From 7eb4775e43a869d01bbf8f48e6f212ee2b5b3529 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:05:51 -0400 Subject: [PATCH 001/117] Bridge & Connection stats: cleanup We got a little heavy with stats over the years and it's cumbersome to add anything here (e.g. another `out` parameter). Instead of adding yet more stats in upcoming bits as-is, I decided to take a stab at simplifying this with `readonly struct` passes. We're not _super_ concerned with efficiency in the exception path but hey, why not. This should simplify maintenance/additions and clarify what each property is. Note that we have some static defaults here because `default` does _not_ run property initializers where a `new()` does. --- Directory.Build.props | 2 +- .../ConnectionMultiplexer.cs | 6 +- src/StackExchange.Redis/ExceptionFactory.cs | 34 +++---- src/StackExchange.Redis/Hacks.cs | 12 +++ src/StackExchange.Redis/PhysicalBridge.cs | 59 ++++++++----- src/StackExchange.Redis/PhysicalConnection.cs | 88 ++++++++++++++++--- src/StackExchange.Redis/ServerEndPoint.cs | 16 ++-- 7 files changed, 154 insertions(+), 63 deletions(-) create mode 100644 src/StackExchange.Redis/Hacks.cs diff --git a/Directory.Build.props b/Directory.Build.props index d43bc25dd..e66dd6b0a 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,7 +15,7 @@ https://github.com/StackExchange/StackExchange.Redis/ MIT - 8.0 + 10.0 git https://github.com/StackExchange/StackExchange.Redis/ diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 58e6252d3..0c542b893 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1779,8 +1779,10 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP { var server = servers[i]; var task = available[i]; - server.GetOutstandingCount(RedisCommand.PING, out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, out var bs, out var rs, out var ws); - log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {inst}, qs: {qs}, in: {@in}, qu: {qu}, aw: {aw}, in-pipe: {toRead}, out-pipe: {toWrite}, bw: {bs}, rs: {rs}. ws: {ws})"); + var bs = server.GetBridgeStatus(RedisCommand.PING); + + //out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, out var bs, out var rs, out var ws); + log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {bs.MessagesSinceLastHeartbeat}, qs: {bs.Connection.MessagesSentAwaitingResponse}, in: {bs.Connection.BytesAvailableOnSocket}, qu: {bs.MessagesSinceLastHeartbeat}, aw: {bs.IsWriterActive}, in-pipe: {bs.Connection.BytesInReadPipe}, out-pipe: {bs.Connection.BytesInWritePipe}, bw: {bs.BacklogStatus}, rs: {bs.Connection.ReadStatus}. ws: {bs.Connection.WriteStatus})"); } } diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 661ce29b8..4cc274d24 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -312,27 +312,31 @@ ServerEndPoint server // Add server data, if we have it if (server != null && message != null) { - server.GetOutstandingCount(message.Command, out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, out var bs, out var rs, out var ws); - switch (rs) + var bs = server.GetBridgeStatus(message.Command); + + switch (bs.Connection.ReadStatus) { case PhysicalConnection.ReadStatus.CompletePendingMessageAsync: case PhysicalConnection.ReadStatus.CompletePendingMessageSync: sb.Append(" ** possible thread-theft indicated; see https://stackexchange.github.io/StackExchange.Redis/ThreadTheft ** "); break; } - Add(data, sb, "OpsSinceLastHeartbeat", "inst", inst.ToString()); - Add(data, sb, "Queue-Awaiting-Write", "qu", qu.ToString()); - Add(data, sb, "Queue-Awaiting-Response", "qs", qs.ToString()); - Add(data, sb, "Active-Writer", "aw", aw.ToString()); - if (qu != 0) Add(data, sb, "Backlog-Writer", "bw", bs.ToString()); - if (rs != PhysicalConnection.ReadStatus.NA) Add(data, sb, "Read-State", "rs", rs.ToString()); - if (ws != PhysicalConnection.WriteStatus.NA) Add(data, sb, "Write-State", "ws", ws.ToString()); - - if (@in >= 0) Add(data, sb, "Inbound-Bytes", "in", @in.ToString()); - if (toRead >= 0) Add(data, sb, "Inbound-Pipe-Bytes", "in-pipe", toRead.ToString()); - if (toWrite >= 0) Add(data, sb, "Outbound-Pipe-Bytes", "out-pipe", toWrite.ToString()); - - if (multiplexer.StormLogThreshold >= 0 && qs >= multiplexer.StormLogThreshold && Interlocked.CompareExchange(ref multiplexer.haveStormLog, 1, 0) == 0) + Add(data, sb, "OpsSinceLastHeartbeat", "inst", bs.MessagesSinceLastHeartbeat.ToString()); + Add(data, sb, "Queue-Awaiting-Write", "qu", bs.BacklogMessagesPending.ToString()); + Add(data, sb, "Queue-Awaiting-Response", "qs", bs.Connection.MessagesSentAwaitingResponse.ToString()); + Add(data, sb, "Active-Writer", "aw", bs.IsWriterActive.ToString()); + if (bs.BacklogMessagesPending != 0) + { + Add(data, sb, "Backlog-Writer", "bw", bs.BacklogStatus.ToString()); + } + if (bs.Connection.ReadStatus != PhysicalConnection.ReadStatus.NA) Add(data, sb, "Read-State", "rs", bs.Connection.ReadStatus.ToString()); + if (bs.Connection.WriteStatus != PhysicalConnection.WriteStatus.NA) Add(data, sb, "Write-State", "ws", bs.Connection.WriteStatus.ToString()); + + if (bs.Connection.BytesAvailableOnSocket >= 0) Add(data, sb, "Inbound-Bytes", "in", bs.Connection.BytesAvailableOnSocket.ToString()); + if (bs.Connection.BytesInReadPipe >= 0) Add(data, sb, "Inbound-Pipe-Bytes", "in-pipe", bs.Connection.BytesInReadPipe.ToString()); + if (bs.Connection.BytesInWritePipe >= 0) Add(data, sb, "Outbound-Pipe-Bytes", "out-pipe", bs.Connection.BytesInWritePipe.ToString()); + + if (multiplexer.StormLogThreshold >= 0 && bs.Connection.MessagesSentAwaitingResponse >= multiplexer.StormLogThreshold && Interlocked.CompareExchange(ref multiplexer.haveStormLog, 1, 0) == 0) { var log = server.GetStormLog(message.Command); if (string.IsNullOrWhiteSpace(log)) Interlocked.Exchange(ref multiplexer.haveStormLog, 0); diff --git a/src/StackExchange.Redis/Hacks.cs b/src/StackExchange.Redis/Hacks.cs new file mode 100644 index 000000000..411a796d5 --- /dev/null +++ b/src/StackExchange.Redis/Hacks.cs @@ -0,0 +1,12 @@ +#if !NET5_0_OR_GREATER + +// To support { get; init; } properties +using System.ComponentModel; + +namespace System.Runtime.CompilerServices +{ + [EditorBrowsable(EditorBrowsableState.Never)] + internal static class IsExternalInit { } +} + +#endif diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 4fe86900d..079d20904 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -281,30 +281,47 @@ private async Task ExecuteSubscriptionLoop() // pushes items that have been enqu internal bool TryEnqueueBackgroundSubscriptionWrite(in PendingSubscriptionState state) => !isDisposed && (_subscriptionBackgroundQueue ?? GetSubscriptionQueue()).Writer.TryWrite(state); - internal void GetOutstandingCount(out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, - out BacklogStatus bs, out PhysicalConnection.ReadStatus rs, out PhysicalConnection.WriteStatus ws) + internal readonly struct BridgeStatus { - inst = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)); - qu = _backlog.Count; - aw = !_singleWriterMutex.IsAvailable; - bs = _backlogStatus; - var tmp = physical; - if (tmp == null) - { - qs = 0; - toRead = toWrite = @in = -1; - rs = PhysicalConnection.ReadStatus.NA; - ws = PhysicalConnection.WriteStatus.NA; - } - else - { - qs = tmp.GetSentAwaitingResponseCount(); - @in = tmp.GetSocketBytes(out toRead, out toWrite); - rs = tmp.GetReadStatus(); - ws = tmp.GetWriteStatus(); - } + /// + /// Number of messages sent since the last heartbeat was processed. + /// + public int MessagesSinceLastHeartbeat { get; init; } + /// + /// Whether the pipe writer is currently active. + /// + public bool IsWriterActive { get; init; } + + /// + /// Total number of backlog messages that are in the retry backlog. + /// + public int BacklogMessagesPending { get; init; } + /// + /// Status of the currently processing backlog, if any. + /// + public BacklogStatus BacklogStatus { get; init; } + + /// + /// Status foor the underlying . + /// + public PhysicalConnection.ConnectionStatus Connection { get; init; } = PhysicalConnection.ConnectionStatus.Default; + + /// + /// The default bridge stats, notable *not* the same as default since initializers don't run. + /// + public static BridgeStatus Zero { get; } = new() { Connection = PhysicalConnection.ConnectionStatus.Zero }; } + internal BridgeStatus GetStatus() => new() + { + MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), + IsWriterActive = !_singleWriterMutex.IsAvailable, + BacklogMessagesPending = _backlogGeneral.Count + _backlogSpecificServer.Count + _backlogHandshake.Count, + BacklogStatus = _backlogStatus, + ActiveBacklog = _activeBacklog, + Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, + }; + internal string GetStormLog() { var sb = new StringBuilder("Storm log for ").Append(Format.ToString(ServerEndPoint.EndPoint)).Append(" / ").Append(ConnectionType) diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 9c2672eaa..4836fe1ad 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -330,7 +330,7 @@ public void RecordConnectionFailed(ConnectionFailureType failureType, Exception // stop anything new coming in... bridge?.Trace("Failed: " + failureType); - long @in = -1, @toRead = -1, @toWrite = -1; + ConnectionStatus connStatus = ConnectionStatus.Default; PhysicalBridge.State oldState = PhysicalBridge.State.Disconnected; bool isCurrent = false; bridge?.OnDisconnected(failureType, this, out isCurrent, out oldState); @@ -338,7 +338,7 @@ public void RecordConnectionFailed(ConnectionFailureType failureType, Exception { try { - @in = GetSocketBytes(out toRead, out toWrite); + connStatus = GetStatus(); } catch { /* best effort only */ } } @@ -408,9 +408,9 @@ void add(string lk, string sk, string v) add("Keep-Alive", "keep-alive", bridge.ServerEndPoint?.WriteEverySeconds + "s"); add("Previous-Physical-State", "state", oldState.ToString()); add("Manager", "mgr", bridge.Multiplexer.SocketManager?.GetState()); - if (@in >= 0) add("Inbound-Bytes", "in", @in.ToString()); - if (toRead >= 0) add("Inbound-Pipe-Bytes", "in-pipe", toRead.ToString()); - if (toWrite >= 0) add("Outbound-Pipe-Bytes", "out-pipe", toWrite.ToString()); + if (connStatus.BytesAvailableOnSocket >= 0) add("Inbound-Bytes", "in", connStatus.BytesAvailableOnSocket.ToString()); + if (connStatus.BytesInReadPipe >= 0) add("Inbound-Pipe-Bytes", "in-pipe", connStatus.BytesInReadPipe.ToString()); + if (connStatus.BytesInWritePipe >= 0) add("Outbound-Pipe-Bytes", "out-pipe", connStatus.BytesInWritePipe.ToString()); add("Last-Heartbeat", "last-heartbeat", (lastBeat == 0 ? "never" : ((unchecked(now - lastBeat) / 1000) + "s ago")) + (BridgeCouldBeNull.IsBeating ? " (mid-beat)" : "")); var mbeat = bridge.Multiplexer.LastHeartbeatSecondsAgo; @@ -1266,25 +1266,87 @@ internal static void WriteInteger(PipeWriter writer, long value) writer.Advance(bytes); } - internal long GetSocketBytes(out long readCount, out long writeCount) + internal readonly struct ConnectionStatus + { + /// + /// Number of messages sent outbound, but we don't yet have a response for. + /// + public int MessagesSentAwaitingResponse { get; init; } + + /// + /// Bytes available on the socket, not yet read into the pipe. + /// + public long BytesAvailableOnSocket { get; init; } = -1; + /// + /// Bytes read from the socket, pending in the reader pipe. + /// + public long BytesInReadPipe { get; init; } = -1; + /// + /// Bytes in the writer pipe, waiting to be written to the socket. + /// + public long BytesInWritePipe { get; init; } = -1; + + /// + /// The inbound pipe reader status. + /// + public ReadStatus ReadStatus { get; init; } = ReadStatus.NA; + /// + /// The outbound pipe writer status. + /// + public WriteStatus WriteStatus { get; init; } = WriteStatus.NA; + + /// + /// The default connection stats, notable *not* the same as default since initializers don't run. + /// + public static ConnectionStatus Default { get; } = new(); + + /// + /// The zeroed connection stats, which we want to display as zero for default exception cases. + /// + public static ConnectionStatus Zero { get; } = new() + { + BytesAvailableOnSocket = 0, + BytesInReadPipe = 0, + BytesInWritePipe = 0 + }; + } + + public ConnectionStatus GetStatus() { if (_ioPipe is SocketConnection conn) { var counters = conn.GetCounters(); - readCount = counters.BytesWaitingToBeRead; - writeCount = counters.BytesWaitingToBeSent; - return counters.BytesAvailableOnSocket; + return new ConnectionStatus() + { + MessagesSentAwaitingResponse = GetSentAwaitingResponseCount(), + BytesAvailableOnSocket = counters.BytesAvailableOnSocket, + BytesInReadPipe = counters.BytesWaitingToBeRead, + BytesInWritePipe = counters.BytesWaitingToBeSent, + ReadStatus = _readStatus, + WriteStatus = _writeStatus, + }; } - readCount = writeCount = -1; + + // Fall back to bytes waiting on the socket if we can + int fallbackBytesAvailable; try - { - return VolatileSocket?.Available ?? -1; + { + fallbackBytesAvailable = VolatileSocket?.Available ?? -1; } catch { // If this fails, we're likely in a race disposal situation and do not want to blow sky high here. - return -1; + fallbackBytesAvailable = -1; } + + return new ConnectionStatus() + { + BytesAvailableOnSocket = fallbackBytesAvailable, + BytesInReadPipe = -1, + BytesInWritePipe = -1, + ReadStatus = _readStatus, + WriteStatus = _writeStatus, + }; } private static RemoteCertificateValidationCallback GetAmbientIssuerCertificateCallback() diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index f6050d4e5..70a1bdf4a 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -434,24 +434,18 @@ internal ServerCounters GetCounters() return counters; } - internal void GetOutstandingCount(RedisCommand command, out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, - out BacklogStatus bs, out PhysicalConnection.ReadStatus rs, out PhysicalConnection.WriteStatus ws) - { - inst = qs = qu = 0; - @in = toRead = toWrite = 0; - aw = false; - bs = BacklogStatus.Inactive; - rs = PhysicalConnection.ReadStatus.NA; - ws = PhysicalConnection.WriteStatus.NA; + internal BridgeStatus GetBridgeStatus(RedisCommand command) + { try { - var bridge = GetBridge(command, false); - bridge?.GetOutstandingCount(out inst, out qs, out @in, out qu, out aw, out toRead, out toWrite, out bs, out rs, out ws); + return GetBridge(command, false)?.GetStatus() ?? BridgeStatus.Zero; } catch (Exception ex) { // only needs to be best efforts System.Diagnostics.Debug.WriteLine(ex.Message); } + + return default; } internal string GetProfile() From b3038dff584806d34582b7fc42acbdedfdd27b5e Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:11:53 -0400 Subject: [PATCH 002/117] Bump SDK versions --- .github/workflows/CI.yml | 8 ++++++++ appveyor.yml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5dacdc4d2..b3f1f058a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -24,6 +24,10 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '5.0.x' + - name: Setup .NET 6.x + uses: actions/setup-dotnet@v1 + with: + dotnet-version: '6.0.x' - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (docker-compose) @@ -55,6 +59,10 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '5.0.x' + - name: Setup .NET 6.x + uses: actions/setup-dotnet@v1 + with: + dotnet-version: '6.0.x' - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (v3.0.503) diff --git a/appveyor.yml b/appveyor.yml index 2cbf38d46..2044b26bc 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ init: install: - cmd: >- - choco install dotnet-sdk --version 5.0.100 + choco install dotnet-sdk --version 6.0.100-rc22150557 cd tests\RedisConfigs\3.0.503 From 50def474412e7261b18d960e3bbf94c919bd67e7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:13:54 -0400 Subject: [PATCH 003/117] Dammit --- .github/workflows/CI.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index b3f1f058a..c2865fc43 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -28,6 +28,7 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '6.0.x' + include-prerelease: true - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (docker-compose) @@ -63,6 +64,7 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '6.0.x' + include-prerelease: true - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (v3.0.503) From 69d907ca3e1e5bd5e190c7baf3ddc0b714818116 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:18:55 -0400 Subject: [PATCH 004/117] Let's try this... --- .github/workflows/CI.yml | 8 -------- appveyor.yml | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c2865fc43..2a3fc1af3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -20,10 +20,6 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '3.1.x' - - name: Setup .NET 5.x - uses: actions/setup-dotnet@v1 - with: - dotnet-version: '5.0.x' - name: Setup .NET 6.x uses: actions/setup-dotnet@v1 with: @@ -56,10 +52,6 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '3.1.x' - - name: Setup .NET 5.x - uses: actions/setup-dotnet@v1 - with: - dotnet-version: '5.0.x' - name: Setup .NET 6.x uses: actions/setup-dotnet@v1 with: diff --git a/appveyor.yml b/appveyor.yml index 2044b26bc..85d0c28cd 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ init: install: - cmd: >- - choco install dotnet-sdk --version 6.0.100-rc22150557 + choco install dotnet-sdk --version "6.0.100-rc22150557" --pre cd tests\RedisConfigs\3.0.503 From 133c4bd6e65d557e330b8770d59caefa1a0116e5 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:43:24 -0400 Subject: [PATCH 005/117] C# 10 is too hard. --- .github/workflows/CI.yml | 10 ++++---- Directory.Build.props | 2 +- appveyor.yml | 2 +- src/StackExchange.Redis/PhysicalBridge.cs | 2 +- src/StackExchange.Redis/PhysicalConnection.cs | 23 +++++++++++++------ 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2a3fc1af3..5dacdc4d2 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -20,11 +20,10 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '3.1.x' - - name: Setup .NET 6.x + - name: Setup .NET 5.x uses: actions/setup-dotnet@v1 with: - dotnet-version: '6.0.x' - include-prerelease: true + dotnet-version: '5.0.x' - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (docker-compose) @@ -52,11 +51,10 @@ jobs: uses: actions/setup-dotnet@v1 with: dotnet-version: '3.1.x' - - name: Setup .NET 6.x + - name: Setup .NET 5.x uses: actions/setup-dotnet@v1 with: - dotnet-version: '6.0.x' - include-prerelease: true + dotnet-version: '5.0.x' - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (v3.0.503) diff --git a/Directory.Build.props b/Directory.Build.props index e66dd6b0a..6227de316 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,7 +15,7 @@ https://github.com/StackExchange/StackExchange.Redis/ MIT - 10.0 + 9.0 git https://github.com/StackExchange/StackExchange.Redis/ diff --git a/appveyor.yml b/appveyor.yml index 85d0c28cd..2cbf38d46 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ init: install: - cmd: >- - choco install dotnet-sdk --version "6.0.100-rc22150557" --pre + choco install dotnet-sdk --version 5.0.100 cd tests\RedisConfigs\3.0.503 diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 079d20904..85a79b6d6 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -304,7 +304,7 @@ internal readonly struct BridgeStatus /// /// Status foor the underlying . /// - public PhysicalConnection.ConnectionStatus Connection { get; init; } = PhysicalConnection.ConnectionStatus.Default; + public PhysicalConnection.ConnectionStatus Connection { get; init; } /// /// The default bridge stats, notable *not* the same as default since initializers don't run. diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 4836fe1ad..7b0096edc 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -1276,29 +1276,36 @@ internal readonly struct ConnectionStatus /// /// Bytes available on the socket, not yet read into the pipe. /// - public long BytesAvailableOnSocket { get; init; } = -1; + public long BytesAvailableOnSocket { get; init; } /// /// Bytes read from the socket, pending in the reader pipe. /// - public long BytesInReadPipe { get; init; } = -1; + public long BytesInReadPipe { get; init; } /// /// Bytes in the writer pipe, waiting to be written to the socket. /// - public long BytesInWritePipe { get; init; } = -1; + public long BytesInWritePipe { get; init; } /// /// The inbound pipe reader status. /// - public ReadStatus ReadStatus { get; init; } = ReadStatus.NA; + public ReadStatus ReadStatus { get; init; } /// /// The outbound pipe writer status. /// - public WriteStatus WriteStatus { get; init; } = WriteStatus.NA; + public WriteStatus WriteStatus { get; init; } /// /// The default connection stats, notable *not* the same as default since initializers don't run. /// - public static ConnectionStatus Default { get; } = new(); + public static ConnectionStatus Default { get; } = new() + { + BytesAvailableOnSocket = -1, + BytesInReadPipe = -1, + BytesInWritePipe = -1, + ReadStatus = ReadStatus.NA, + WriteStatus = WriteStatus.NA, + }; /// /// The zeroed connection stats, which we want to display as zero for default exception cases. @@ -1307,7 +1314,9 @@ internal readonly struct ConnectionStatus { BytesAvailableOnSocket = 0, BytesInReadPipe = 0, - BytesInWritePipe = 0 + BytesInWritePipe = 0, + ReadStatus = ReadStatus.NA, + WriteStatus = WriteStatus.NA, }; } From 4f4be8bc0a112e1fed2db1ee75839e4214886ec9 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 14:48:09 -0400 Subject: [PATCH 006/117] Fully revert to non-branch --- src/StackExchange.Redis/PhysicalBridge.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 85a79b6d6..115e3c275 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -316,9 +316,8 @@ internal readonly struct BridgeStatus { MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), IsWriterActive = !_singleWriterMutex.IsAvailable, - BacklogMessagesPending = _backlogGeneral.Count + _backlogSpecificServer.Count + _backlogHandshake.Count, + BacklogMessagesPending = _backlog.Count, BacklogStatus = _backlogStatus, - ActiveBacklog = _activeBacklog, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, }; From 265c0284110be8c1417ea2d54589fe2964ebd389 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 17:54:42 -0400 Subject: [PATCH 007/117] Backlog; v3 implementation start This isn't working as we want yet in several regards but putting a progress commit in here. Overall: - Needs offload to a sibling connection (and option governing this) - Needs to fail much faster in the initial connection scenario - Lots of test love around the new functionality --- src/StackExchange.Redis/BacklogPolicy.cs | 43 ++ .../ConfigurationOptions.cs | 10 +- .../ConnectionMultiplexer.cs | 17 +- src/StackExchange.Redis/ExceptionFactory.cs | 1 + src/StackExchange.Redis/PhysicalBridge.cs | 376 +++++++++++------- src/StackExchange.Redis/ServerEndPoint.cs | 5 +- .../ServerSelectionStrategy.cs | 26 +- tests/StackExchange.Redis.Tests/AsyncTests.cs | 2 +- .../StackExchange.Redis.Tests/BacklogTests.cs | 55 +++ .../ConnectFailTimeout.cs | 2 +- .../ConnectingFailDetection.cs | 2 + tests/StackExchange.Redis.Tests/TestBase.cs | 10 +- 12 files changed, 388 insertions(+), 161 deletions(-) create mode 100644 src/StackExchange.Redis/BacklogPolicy.cs create mode 100644 tests/StackExchange.Redis.Tests/BacklogTests.cs diff --git a/src/StackExchange.Redis/BacklogPolicy.cs b/src/StackExchange.Redis/BacklogPolicy.cs new file mode 100644 index 000000000..b8111944d --- /dev/null +++ b/src/StackExchange.Redis/BacklogPolicy.cs @@ -0,0 +1,43 @@ +namespace StackExchange.Redis +{ + /// + /// The backlog policy to use for commands. This policy comes into effect when a connection is unhealthy or unavailable. + /// The policy can choose to backlog commands and wait to try them (within their timeout) against a connection when it comes up, + /// or it could choose to fail fast and throw ASAP. Different apps desire different behaviors with backpressure and how to handle + /// large amounts of load, so this is configurable to optimize the happy path but avoid spiral-of-death queue scenarios for others. + /// + public class BacklogPolicy + { + /// + /// Backlog behavior matching StackExchange.Redis's 2.x line, failing fast and not attempting to queue + /// and retry when a connection is available again. + /// + public static BacklogPolicy FailFast = new() + { + QueueWhileDisconnected = false, + AbortPendingOnConnectionFailure = true, + }; + + /// + /// Default backlog policy which will allow commands to be issues against an endpoint and queue up. + /// Commands are still subject to their async timeout (which serves as a queue size check). + /// + public static BacklogPolicy Default = new() + { + QueueWhileDisconnected = true, + AbortPendingOnConnectionFailure = false, + }; + + /// + /// Whether to queue commands while disconnected. + /// True means queue for attempts up until their timeout. + /// False means to fail ASAP and queue nothing. + /// + public bool QueueWhileDisconnected { get; init; } + + /// + /// Whether to immediately abandon (with an exception) all pending commands when a connection goes unhealthy. + /// + public bool AbortPendingOnConnectionFailure { get; init; } + } +} diff --git a/src/StackExchange.Redis/ConfigurationOptions.cs b/src/StackExchange.Redis/ConfigurationOptions.cs index 44486acbc..470f6af03 100644 --- a/src/StackExchange.Redis/ConfigurationOptions.cs +++ b/src/StackExchange.Redis/ConfigurationOptions.cs @@ -147,6 +147,8 @@ public static string TryNormalize(string value) private IReconnectRetryPolicy reconnectRetryPolicy; + private BacklogPolicy backlogPolicy; + /// /// A LocalCertificateSelectionCallback delegate responsible for selecting the certificate used for authentication; note /// that this cannot be specified in the configuration-string. @@ -332,10 +334,15 @@ public bool PreserveAsyncOrder public Proxy Proxy { get { return proxy.GetValueOrDefault(); } set { proxy = value; } } /// - /// The retry policy to be used for connection reconnects + /// The retry policy to be used for connection reconnects. /// public IReconnectRetryPolicy ReconnectRetryPolicy { get { return reconnectRetryPolicy ??= new LinearRetry(ConnectTimeout); } set { reconnectRetryPolicy = value; } } + /// + /// The backlog policy to be used for commands when a connection is unhealthy. + /// + public BacklogPolicy BacklogPolicy { get => backlogPolicy ?? BacklogPolicy.Default; set => backlogPolicy = value; } + /// /// Indicates whether endpoints should be resolved via DNS before connecting. /// If enabled the ConnectionMultiplexer will not re-resolve DNS @@ -464,6 +471,7 @@ public ConfigurationOptions Clone() responseTimeout = responseTimeout, DefaultDatabase = DefaultDatabase, ReconnectRetryPolicy = reconnectRetryPolicy, + BacklogPolicy = backlogPolicy, SslProtocols = SslProtocols, checkCertificateRevocation = checkCertificateRevocation, }; diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 0c542b893..be9d8643d 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -801,7 +801,7 @@ internal void OnHashSlotMoved(int hashSlot, EndPoint old, EndPoint @new) /// The key to get a hash slot ID for. public int HashSlot(RedisKey key) => ServerSelectionStrategy.HashSlot(key); - internal ServerEndPoint AnyConnected(ServerType serverType, uint startOffset, RedisCommand command, CommandFlags flags) + internal ServerEndPoint AnyServer(ServerType serverType, uint startOffset, RedisCommand command, CommandFlags flags, bool allowDisconnected) { var tmp = GetServerSnapshot(); int len = tmp.Length; @@ -809,7 +809,7 @@ internal ServerEndPoint AnyConnected(ServerType serverType, uint startOffset, Re for (int i = 0; i < len; i++) { var server = tmp[(int)(((uint)i + startOffset) % len)]; - if (server != null && server.ServerType == serverType && server.IsSelectable(command)) + if (server != null && server.ServerType == serverType && server.IsSelectable(command, allowDisconnected)) { if (server.IsReplica) { @@ -2232,8 +2232,15 @@ private bool PrepareToPushMessageToBridge(Message message, ResultProcessor message.SetSource(processor, resultBox); if (server == null) - { // infer a server automatically + { + // Infer a server automatically server = SelectServer(message); + + // If we didn't find one successfully, and we're allowed, queue for any viable server + if (server == null && message != null && RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + server = ServerSelectionStrategy.Select(message, allowDisconnected: true); + } } else // a server was specified; do we trust their choice, though? { @@ -2251,7 +2258,9 @@ private bool PrepareToPushMessageToBridge(Message message, ResultProcessor } break; } - if (!server.IsConnected) + + // If we're not allowed to queue while disconnected, we'll bomb out below. + if (!server.IsConnected && !RawConfig.BacklogPolicy.QueueWhileDisconnected) { // well, that's no use! server = null; diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 4cc274d24..53ece65eb 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -328,6 +328,7 @@ ServerEndPoint server if (bs.BacklogMessagesPending != 0) { Add(data, sb, "Backlog-Writer", "bw", bs.BacklogStatus.ToString()); + Add(data, sb, "Backlog-Name", "abl", bs.ActiveBacklog.ToString()); } if (bs.Connection.ReadStatus != PhysicalConnection.ReadStatus.NA) Add(data, sb, "Read-State", "rs", bs.Connection.ReadStatus.ToString()); if (bs.Connection.WriteStatus != PhysicalConnection.WriteStatus.NA) Add(data, sb, "Write-State", "ws", bs.Connection.WriteStatus.ToString()); diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 115e3c275..3d00a4456 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -26,7 +26,22 @@ internal sealed class PhysicalBridge : IDisposable private readonly long[] profileLog = new long[ProfileLogSamples]; - private readonly ConcurrentQueue _backlog = new ConcurrentQueue(); + // We have 3 queues in play on this bridge, and things enter them in this order: + // General: for anything coming into the bridge. Everything but handshake commands goes into this queue. + // SpecificServer: for anything targeting this endpoint that cannot be handed off to another endpoint. + // Handshake: foor anything coming from our own handshake + // The queue priority order is reverse: + // 1. Handshake messages are sent first to re-establish the connection (e.g. AUTH) + // 2. Specific server messages are sent next (e.g. REPLICAOF - this queue is rare to start with) + // 3. All other messages are handled + // Note: this doesn't mean sent - if we have another viable endpoint and these messages can be sent on it, + // then we'll either send them to it. Any messages specifying this endpoint explicitly will go into the + // specific server queue so that we unblock the general FIFO queue for other handoffs. + // + private readonly ConcurrentQueue _backlogGeneral = new(), + _backlogSpecificServer = new(), + _backlogHandshake = new(); + private bool BacklogHasItems => !_backlogGeneral.IsEmpty || !_backlogSpecificServer.IsEmpty || !_backlogHandshake.IsEmpty; private int _backlogProcessorIsRunning = 0; private int activeWriters = 0; @@ -135,8 +150,8 @@ private WriteResult QueueOrFailMessage(Message message) // you can go in the queue, but we won't be starting // a worker, because the handshake has not completed message.SetEnqueued(null); - message.SetBacklogState(_backlog.Count, null); - _backlog.Enqueue(message); + message.SetBacklogState(_backlogGeneral.Count, null); + _backlogGeneral.Enqueue(message); return WriteResult.Success; // we'll take it... } else @@ -295,11 +310,29 @@ internal readonly struct BridgeStatus /// /// Total number of backlog messages that are in the retry backlog. /// - public int BacklogMessagesPending { get; init; } + public int BacklogMessagesPending => BacklogMessagesPendingGeneral + BacklogMessagesPendingSpecificServer + BacklogMessagesPendingHandshake; + + /// + /// The number of backlog messages that are in the retry queue. + /// + public int BacklogMessagesPendingGeneral { get; init; } + /// + /// The number of backlog messages that are in the retry queue. + /// + public int BacklogMessagesPendingSpecificServer { get; init; } + /// + /// The number of backlog messages that are in the retry queue. + /// + public int BacklogMessagesPendingHandshake { get; init; } + /// /// Status of the currently processing backlog, if any. /// public BacklogStatus BacklogStatus { get; init; } + /// + /// Name of the currently processing backlog, if any. + /// + public Backlog ActiveBacklog { get; init; } /// /// Status foor the underlying . @@ -316,8 +349,11 @@ internal readonly struct BridgeStatus { MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), IsWriterActive = !_singleWriterMutex.IsAvailable, - BacklogMessagesPending = _backlog.Count, + BacklogMessagesPendingGeneral = _backlogGeneral.Count, + BacklogMessagesPendingSpecificServer = _backlogSpecificServer.Count, + BacklogMessagesPendingHandshake = _backlogHandshake.Count, BacklogStatus = _backlogStatus, + ActiveBacklog = _activeBacklog, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, }; @@ -415,7 +451,12 @@ internal void ResetNonConnected() internal void OnConnectionFailed(PhysicalConnection connection, ConnectionFailureType failureType, Exception innerException) { Trace($"OnConnectionFailed: {connection}"); - AbandonPendingBacklog(innerException); + // If we're configured to, fail all pending backlogged messages + if (Multiplexer.RawConfig.BacklogPolicy?.AbortPendingOnConnectionFailure == true) + { + AbandonPendingBacklog(innerException); + } + if (reportNextFailure) { LastException = innerException; @@ -463,12 +504,19 @@ internal void OnDisconnected(ConnectionFailureType failureType, PhysicalConnecti private void AbandonPendingBacklog(Exception ex) { - while (_backlog.TryDequeue(out Message next)) + // Drain both lower queues, but not handshake since that's likely to cause overlapping failure shenanigans. + while (_backlogSpecificServer.TryDequeue(out Message next)) + { + Multiplexer?.OnMessageFaulted(next, ex); + next.SetExceptionAndComplete(ex, this); + } + while (_backlogGeneral.TryDequeue(out Message next)) { Multiplexer?.OnMessageFaulted(next, ex); next.SetExceptionAndComplete(ex, this); } } + internal void OnFullyEstablished(PhysicalConnection connection, string source) { Trace("OnFullyEstablished"); @@ -481,8 +529,7 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) ServerEndPoint.OnFullyEstablished(connection, source); // do we have pending system things to do? - bool createWorker = !_backlog.IsEmpty; - if (createWorker) StartBacklogProcessor(); + if (BacklogHasItems) StartBacklogProcessor(); if (ConnectionType == ConnectionType.Interactive) ServerEndPoint.CheckInfoReplication(); } @@ -500,7 +547,7 @@ internal void OnHeartbeat(bool ifConnectedOnly) bool runThisTime = false; try { - CheckBacklogForTimeouts(); + CheckBacklogsForTimeouts(); runThisTime = !isDisposed && Interlocked.CompareExchange(ref beating, 1, 0) == 0; if (!runThisTime) return; @@ -757,18 +804,33 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PushToBacklog(Message message, bool onlyIfExists) + private bool PushToBacklog(Message message, bool onlyIfExists, bool isHandShake = false) { + // If we're unhealthy in middle of a handshake, queue behind so that we AUTH and such in order + // For anything not handshake, we handle as before going into the general queue + if (isHandShake) + { + // If this is the initial attempt, bail - we'll come back if we fail to write to the pipe. + if (_backlogHandshake.IsEmpty & onlyIfExists) + { + return false; + } + + _backlogHandshake.Enqueue(message); + StartBacklogProcessor(); + return true; + } + // Note, for deciding emptyness for whether to push onlyIfExists, and start worker, // we only need care if WE are able to // see the queue when its empty. Not whether anyone else sees it as empty. // So strong synchronization is not required. - if (_backlog.IsEmpty & onlyIfExists) return false; + if (_backlogGeneral.IsEmpty & onlyIfExists) return false; - int count = _backlog.Count; + int count = _backlogGeneral.Count; message.SetBacklogState(count, physical); - _backlog.Enqueue(message); + _backlogGeneral.Enqueue(message); // The correct way to decide to start backlog process is not based on previously empty // but based on a) not empty now (we enqueued!) and b) no backlog processor already running. @@ -776,26 +838,28 @@ private bool PushToBacklog(Message message, bool onlyIfExists) StartBacklogProcessor(); return true; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void StartBacklogProcessor() { if (Interlocked.CompareExchange(ref _backlogProcessorIsRunning, 1, 0) == 0) { - #if DEBUG _backlogProcessorRequestedTime = Environment.TickCount; #endif _backlogStatus = BacklogStatus.Activating; - // start the backlog processor; this is a bit unorthadox, as you would *expect* this to just + // Start the backlog processor; this is a bit unorthadox, as you would *expect* this to just // be Task.Run; that would work fine when healthy, but when we're falling on our face, it is // easy to get into a thread-pool-starvation "spiral of death" if we rely on the thread-pool // to unblock the thread-pool when there could be sync-over-async callers. Note that in reality, // the initial "enough" of the back-log processor is typically sync, which means that the thread // we start is actually useful, despite thinking "but that will just go async and back to the pool" - var thread = new Thread(s => ((PhysicalBridge)s).ProcessBacklogAsync().RedisFireAndForget()); - thread.IsBackground = true; // don't keep process alive (also: act like the thread-pool used to) - thread.Name = "redisbacklog"; // help anyone looking at thread-dumps + var thread = new Thread(s => ((PhysicalBridge)s).ProcessBacklogsAsync().RedisFireAndForget()) + { + IsBackground = true, // don't keep process alive (also: act like the thread-pool used to) + Name = "redisbacklog", // help anyone looking at thread-dumps + }; thread.Start(this); } } @@ -803,38 +867,47 @@ private void StartBacklogProcessor() private volatile int _backlogProcessorRequestedTime; #endif - private void CheckBacklogForTimeouts() // check the head of the backlog queue, consuming anything that looks dead + private void CheckBacklogsForTimeouts() // check the head of the backlog queue, consuming anything that looks dead { - var now = Environment.TickCount; - var timeout = TimeoutMilliseconds; - - // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use - // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. - // But we reduce contention by only locking if we see something that looks timed out. - while (_backlog.TryPeek(out Message message)) + // Check the head of the backlog queue, consuming anything that looks dead + void crawlQueue(ConcurrentQueue backlog) { - if (message.IsInternalCall) break; // don't stomp these (not that they should have the async timeout flag, but...) - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking - lock (_backlog) - { - // peek again since we didn't have lock before... - // and rerun the exact same checks as above, note that it may be a different message now - if (!_backlog.TryPeek(out message)) break; - if (message.IsInternalCall) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; + var now = Environment.TickCount; + var timeout = TimeoutMilliseconds; - if (!_backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real + // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use + // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. + // But we reduce contention by only locking if we see something that looks timed out. + while (backlog.TryPeek(out Message message)) + { + if (message.IsInternalCall) break; // don't stomp these (not that they should have the async timeout flag, but...) + if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking + lock (backlog) { - throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); + // peek again since we didn't have lock before... + // and rerun the exact same checks as above, note that it may be a different message now + if (!backlog.TryPeek(out message)) break; + if (message.IsInternalCall) break; + if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; + + if (!backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real + { + throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); + } } - } - // Tell the message it has failed - // Note: Attempting to *avoid* reentrancy/deadlock issues by not holding the lock while completing messages. - var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); - message.SetExceptionAndComplete(ex, this); + // Tell the message it has failed + // Note: Attempting to *avoid* reentrancy/deadlock issues by not holding the lock while completing messages. + var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); + message.SetExceptionAndComplete(ex, this); + } } + + crawlQueue(_backlogHandshake); + crawlQueue(_backlogSpecificServer); + crawlQueue(_backlogGeneral); } + internal enum BacklogStatus : byte { Inactive, @@ -852,109 +925,43 @@ internal enum BacklogStatus : byte SettingIdle, Faulted, } + + internal enum Backlog : byte + { + None, + General, + SpecificServer, + Handshake, + } + + private volatile Backlog _activeBacklog; private volatile BacklogStatus _backlogStatus; - private async Task ProcessBacklogAsync() + private async Task ProcessBacklogsAsync() { - LockToken token = default; + _backlogStatus = BacklogStatus.Starting; try { -#if DEBUG - int tryToAcquireTime = Environment.TickCount; - var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); - int failureCount = 0; -#endif - _backlogStatus = BacklogStatus.Starting; - while (true) + if (!_backlogHandshake.IsEmpty) { - // check whether the backlog is empty *before* even trying to get the lock - if (_backlog.IsEmpty) return; // nothing to do - - // try and get the lock; if unsuccessful, retry - token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); - if (token.Success) break; // got the lock; now go do something with it - -#if DEBUG - failureCount++; -#endif + await ProcessBridgeBacklogAsync(_backlogHandshake, Backlog.Handshake); } - _backlogStatus = BacklogStatus.Started; -#if DEBUG - int acquiredTime = Environment.TickCount; - var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); -#endif - - // so now we are the writer; write some things! - Message message; - var timeout = TimeoutMilliseconds; - while(true) + if (!_backlogSpecificServer.IsEmpty) { - _backlogStatus = BacklogStatus.CheckingForWork; - // We need to lock _backlog when dequeueing because of - // races with timeout processing logic - lock (_backlog) - { - if (!_backlog.TryDequeue(out message)) break; // all done - } - - try - { - _backlogStatus = BacklogStatus.CheckingForTimeout; - if (message.HasAsyncTimedOut(Environment.TickCount, timeout, out var _)) - { - _backlogStatus = BacklogStatus.RecordingTimeout; - var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); -#if DEBUG // additional tracking - ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; - ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; - if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; - if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); - var maxFlush = physical?.MaxFlushTime ?? -1; - if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); - if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; -#endif - message.SetExceptionAndComplete(ex, this); - } - else - { - _backlogStatus = BacklogStatus.WritingMessage; - var result = WriteMessageInsideLock(physical, message); - - if (result == WriteResult.Success) - { - _backlogStatus = BacklogStatus.Flushing; - result = await physical.FlushAsync(false).ConfigureAwait(false); - } - - _backlogStatus = BacklogStatus.MarkingInactive; - if (result != WriteResult.Success) - { - _backlogStatus = BacklogStatus.RecordingWriteFailure; - var ex = Multiplexer.GetException(result, message, ServerEndPoint); - HandleWriteException(message, ex); - } - } - } - catch (Exception ex) - { - _backlogStatus = BacklogStatus.RecordingFault; - HandleWriteException(message, ex); - } - finally - { - UnmarkActiveMessage(message); - } + await ProcessBridgeBacklogAsync(_backlogSpecificServer, Backlog.SpecificServer); + } + if (!_backlogGeneral.IsEmpty) + { + await ProcessBridgeBacklogAsync(_backlogGeneral, Backlog.General); // Needs handoff + // only handoff to another completely viable connection } - _backlogStatus = BacklogStatus.SettingIdle; - physical.SetIdle(); - _backlogStatus = BacklogStatus.Inactive; } catch { _backlogStatus = BacklogStatus.Faulted; } finally - { - token.Dispose(); + { + _activeBacklog = Backlog.None; // Do this in finally block, so that thread aborts can't convince us the backlog processor is running forever if (Interlocked.CompareExchange(ref _backlogProcessorIsRunning, 0, 1) != 1) @@ -964,7 +971,7 @@ private async Task ProcessBacklogAsync() // Now that nobody is processing the backlog, we should consider starting a new backlog processor // in case a new message came in after we ended this loop. - if (!_backlog.IsEmpty) + if (BacklogHasItems) { // Check for faults mainly to prevent unlimited tasks spawning in a fault scenario // - it isn't StackOverflowException due to the Task.Run() @@ -976,6 +983,102 @@ private async Task ProcessBacklogAsync() } } + private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, Backlog handlingBacklog) + { + LockToken token = default; +#if DEBUG + int tryToAcquireTime = Environment.TickCount; + var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); + int failureCount = 0; +#endif + _activeBacklog = handlingBacklog; + _backlogStatus = BacklogStatus.Starting; + + while (true) + { + // check whether the backlog is empty *before* even trying to get the lock + if (backlog.IsEmpty) return; // nothing to do + + // try and get the lock; if unsuccessful, retry + token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); + if (token.Success) break; // got the lock; now go do something with it +#if DEBUG + failureCount++; +#endif + } + _backlogStatus = BacklogStatus.Started; + +#if DEBUG + int acquiredTime = Environment.TickCount; + var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); +#endif + + // so now we are the writer; write some things! + Message message; + var timeout = TimeoutMilliseconds; + while (true) + { + _backlogStatus = BacklogStatus.CheckingForWork; + // We need to lock _backlog when dequeueing because of + // races with timeout processing logic + lock (backlog) + { + if (!backlog.TryDequeue(out message)) break; // all done + } + + try + { + _backlogStatus = BacklogStatus.CheckingForTimeout; + if (message.HasAsyncTimedOut(Environment.TickCount, timeout, out var _)) + { + _backlogStatus = BacklogStatus.RecordingTimeout; + var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); +#if DEBUG // additional tracking + ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; + ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; + if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; + if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); + var maxFlush = physical?.MaxFlushTime ?? -1; + if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); + if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; +#endif + message.SetExceptionAndComplete(ex, this); + } + else + { + _backlogStatus = BacklogStatus.WritingMessage; + var result = WriteMessageInsideLock(physical, message); + + if (result == WriteResult.Success) + { + _backlogStatus = BacklogStatus.Flushing; + result = await physical.FlushAsync(false).ConfigureAwait(false); + } + + _backlogStatus = BacklogStatus.MarkingInactive; + if (result != WriteResult.Success) + { + _backlogStatus = BacklogStatus.RecordingWriteFailure; + var ex = Multiplexer.GetException(result, message, ServerEndPoint); + HandleWriteException(message, ex); + } + } + } + catch (Exception ex) + { + _backlogStatus = BacklogStatus.RecordingFault; + HandleWriteException(message, ex); + } + finally + { + UnmarkActiveMessage(message); + } + } + _backlogStatus = BacklogStatus.SettingIdle; + physical.SetIdle(); + _backlogStatus = BacklogStatus.Inactive; + } + private WriteResult TimedOutBeforeWrite(Message message) { message.Cancel(); @@ -989,7 +1092,8 @@ private WriteResult TimedOutBeforeWrite(Message message) /// /// The phsyical connection to write to. /// The message to be written. - internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message) + /// Whether this message is part of the handshake process. + internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool isHandShake = false) { /* design decision/choice; the code works fine either way, but if this is * set to *true*, then when we can't take the writer-lock *right away*, @@ -1009,7 +1113,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course - if (PushToBacklog(message, onlyIfExists: true)) + if (PushToBacklog(message, onlyIfExists: true, isHandShake: isHandShake)) { return new ValueTask(WriteResult.Success); // queued counts as success } @@ -1027,7 +1131,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? - if (PushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK)) + if (PushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, isHandShake: isHandShake)) return new ValueTask(WriteResult.Success); // queued counts as success // no backlog... try to wait with the timeout; diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 70a1bdf4a..4f23596cf 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -768,6 +768,9 @@ internal string Summary() return sb.ToString(); } + /// + /// Write the message directly or queues in the handshake (priority) queue. + /// internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection connection, Message message, ResultProcessor processor) { static async ValueTask Awaited(ValueTask l_result) => await l_result.ForAwait(); @@ -791,7 +794,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co } else { - result = bridge.WriteMessageTakingWriteLockAsync(connection, message); + result = bridge.WriteMessageTakingWriteLockAsync(connection, message, isHandShake: true); } } diff --git a/src/StackExchange.Redis/ServerSelectionStrategy.cs b/src/StackExchange.Redis/ServerSelectionStrategy.cs index ac9e664ca..4ce30c334 100644 --- a/src/StackExchange.Redis/ServerSelectionStrategy.cs +++ b/src/StackExchange.Redis/ServerSelectionStrategy.cs @@ -93,7 +93,7 @@ private static unsafe int GetClusterSlot(in RedisKey key) } } - public ServerEndPoint Select(Message message) + public ServerEndPoint Select(Message message, bool allowDisconnected = false) { if (message == null) throw new ArgumentNullException(nameof(message)); int slot = NoSlot; @@ -107,13 +107,13 @@ public ServerEndPoint Select(Message message) if (slot == MultipleSlots) throw ExceptionFactory.MultiSlot(multiplexer.IncludeDetailInExceptions, message); break; } - return Select(slot, message.Command, message.Flags); + return Select(slot, message.Command, message.Flags, allowDisconnected); } - public ServerEndPoint Select(RedisCommand command, in RedisKey key, CommandFlags flags) + public ServerEndPoint Select(RedisCommand command, in RedisKey key, CommandFlags flags, bool allowDisconnected = false) { int slot = ServerType == ServerType.Cluster ? HashSlot(key) : NoSlot; - return Select(slot, command, flags); + return Select(slot, command, flags, allowDisconnected); } public bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved) @@ -227,10 +227,8 @@ private static unsafe int IndexOf(byte* ptr, byte value, int start, int end) return -1; } - private ServerEndPoint Any(RedisCommand command, CommandFlags flags) - { - return multiplexer.AnyConnected(ServerType, (uint)Interlocked.Increment(ref anyStartOffset), command, flags); - } + private ServerEndPoint Any(RedisCommand command, CommandFlags flags, bool allowDisconnected) => + multiplexer.AnyServer(ServerType, (uint)Interlocked.Increment(ref anyStartOffset), command, flags, allowDisconnected); private static ServerEndPoint FindMaster(ServerEndPoint endpoint, RedisCommand command) { @@ -273,12 +271,12 @@ private ServerEndPoint[] MapForMutation() return arr; } - private ServerEndPoint Select(int slot, RedisCommand command, CommandFlags flags) + private ServerEndPoint Select(int slot, RedisCommand command, CommandFlags flags, bool allowDisconnected) { flags = Message.GetMasterReplicaFlags(flags); // only intersted in master/replica preferences ServerEndPoint[] arr; - if (slot == NoSlot || (arr = map) == null) return Any(command, flags); + if (slot == NoSlot || (arr = map) == null) return Any(command, flags, allowDisconnected); ServerEndPoint endpoint = arr[slot], testing; // but: ^^^ is the MASTER slots; if we want a replica, we need to do some thinking @@ -288,21 +286,21 @@ private ServerEndPoint Select(int slot, RedisCommand command, CommandFlags flags switch (flags) { case CommandFlags.DemandReplica: - return FindReplica(endpoint, command) ?? Any(command, flags); + return FindReplica(endpoint, command) ?? Any(command, flags, allowDisconnected); case CommandFlags.PreferReplica: testing = FindReplica(endpoint, command); if (testing != null) return testing; break; case CommandFlags.DemandMaster: - return FindMaster(endpoint, command) ?? Any(command, flags); + return FindMaster(endpoint, command) ?? Any(command, flags, allowDisconnected); case CommandFlags.PreferMaster: testing = FindMaster(endpoint, command); if (testing != null) return testing; break; } - if (endpoint.IsSelectable(command)) return endpoint; + if (endpoint.IsSelectable(command, allowDisconnected)) return endpoint; } - return Any(command, flags); + return Any(command, flags, allowDisconnected); } } } diff --git a/tests/StackExchange.Redis.Tests/AsyncTests.cs b/tests/StackExchange.Redis.Tests/AsyncTests.cs index 5ee26f815..aefa52130 100644 --- a/tests/StackExchange.Redis.Tests/AsyncTests.cs +++ b/tests/StackExchange.Redis.Tests/AsyncTests.cs @@ -19,7 +19,7 @@ public void AsyncTasksReportFailureIfServerUnavailable() { SetExpectedAmbientFailureCount(-1); // this will get messy - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, backlogPolicy: BacklogPolicy.FailFast)) { var server = conn.GetServer(TestConfig.Current.MasterServer, TestConfig.Current.MasterPort); diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs new file mode 100644 index 000000000..3c8268e44 --- /dev/null +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -0,0 +1,55 @@ +using System; +using System.Threading; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +namespace StackExchange.Redis.Tests +{ + public class BacklogTests : TestBase + { + public BacklogTests(ITestOutputHelper output) : base (output) { } + + protected override string GetConfiguration() => TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; + + [Fact] + public async Task BasicTest() + { + try + { + using (var muxer = Create(keepAlive: 1, connectTimeout: 10000, allowAdmin: true, shared: false)) + { + var conn = muxer.GetDatabase(); + conn.Ping(); + + var server = muxer.GetServer(muxer.GetEndPoints()[0]); + var server2 = muxer.GetServer(muxer.GetEndPoints()[1]); + + muxer.AllowConnect = false; + + // muxer.IsConnected is true of *any* are connected, simulate failure for all cases. + server.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(server.IsConnected); + Assert.True(server2.IsConnected); + Assert.True(muxer.IsConnected); + + server2.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(server.IsConnected); + Assert.False(server2.IsConnected); + Assert.False(muxer.IsConnected); + + // should reconnect within 1 keepalive interval + muxer.AllowConnect = true; + Log("Waiting for reconnect"); + await UntilCondition(TimeSpan.FromSeconds(2), () => muxer.IsConnected).ForAwait(); + + Assert.True(muxer.IsConnected); + } + } + finally + { + ClearAmbientFailures(); + } + } + } +} diff --git a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs index c52082d12..02b796997 100644 --- a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs +++ b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs @@ -13,7 +13,7 @@ public ConnectFailTimeout(ITestOutputHelper output) : base (output) { } public async Task NoticesConnectFail() { SetExpectedAmbientFailureCount(-1); - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, backlogPolicy: BacklogPolicy.FailFast)) { var server = conn.GetServer(conn.GetEndPoints()[0]); diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index d1020e6e3..8b80fc56f 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -99,8 +99,10 @@ public async Task Issue922_ReconnectRaised() config.AbortOnConnectFail = true; config.KeepAlive = 10; config.SyncTimeout = 1000; + config.AsyncTimeout = 1000; config.ReconnectRetryPolicy = new ExponentialRetry(5000); config.AllowAdmin = true; + config.BacklogPolicy = BacklogPolicy.FailFast; int failCount = 0, restoreCount = 0; diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index c31a72178..2928c894e 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -230,6 +230,7 @@ internal virtual IInternalConnectionMultiplexer Create( string channelPrefix = null, Proxy? proxy = null, string configuration = null, bool logTransactionData = true, bool shared = true, int? defaultDatabase = null, + BacklogPolicy backlogPolicy = null, [CallerMemberName] string caller = null) { if (Output == null) @@ -238,7 +239,7 @@ internal virtual IInternalConnectionMultiplexer Create( } if (shared && _fixture != null && _fixture.IsEnabled && enabledCommands == null && disabledCommands == null && fail && channelPrefix == null && proxy == null - && configuration == null && password == null && tieBreaker == null && defaultDatabase == null && (allowAdmin == null || allowAdmin == true) && expectedFailCount == 0) + && configuration == null && password == null && tieBreaker == null && defaultDatabase == null && (allowAdmin == null || allowAdmin == true) && expectedFailCount == 0 && backlogPolicy == null) { configuration = GetConfiguration(); if (configuration == _fixture.Configuration) @@ -255,7 +256,9 @@ internal virtual IInternalConnectionMultiplexer Create( checkConnect, failMessage, channelPrefix, proxy, configuration ?? GetConfiguration(), - logTransactionData, defaultDatabase, caller); + logTransactionData, defaultDatabase, + backlogPolicy, + caller); muxer.InternalError += OnInternalError; muxer.ConnectionFailed += OnConnectionFailed; return muxer; @@ -270,7 +273,7 @@ public static ConnectionMultiplexer CreateDefault( string channelPrefix = null, Proxy? proxy = null, string configuration = null, bool logTransactionData = true, int? defaultDatabase = null, - + BacklogPolicy backlogPolicy = null, [CallerMemberName] string caller = null) { StringWriter localLog = null; @@ -306,6 +309,7 @@ public static ConnectionMultiplexer CreateDefault( if (connectTimeout != null) config.ConnectTimeout = connectTimeout.Value; if (proxy != null) config.Proxy = proxy.Value; if (defaultDatabase != null) config.DefaultDatabase = defaultDatabase.Value; + if (backlogPolicy != null) config.BacklogPolicy = backlogPolicy; var watch = Stopwatch.StartNew(); var task = ConnectionMultiplexer.ConnectAsync(config, log); if (!task.Wait(config.ConnectTimeout >= (int.MaxValue / 2) ? int.MaxValue : config.ConnectTimeout * 2)) From f69231297f0cba0c24819fb1e7b764c3432e999c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 21:39:28 -0400 Subject: [PATCH 008/117] Woops, good catch! --- src/StackExchange.Redis/ConnectionMultiplexer.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 0c542b893..85803df2c 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1781,7 +1781,6 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP var task = available[i]; var bs = server.GetBridgeStatus(RedisCommand.PING); - //out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, out var bs, out var rs, out var ws); log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {bs.MessagesSinceLastHeartbeat}, qs: {bs.Connection.MessagesSentAwaitingResponse}, in: {bs.Connection.BytesAvailableOnSocket}, qu: {bs.MessagesSinceLastHeartbeat}, aw: {bs.IsWriterActive}, in-pipe: {bs.Connection.BytesInReadPipe}, out-pipe: {bs.Connection.BytesInWritePipe}, bw: {bs.BacklogStatus}, rs: {bs.Connection.ReadStatus}. ws: {bs.Connection.WriteStatus})"); } } From 4b5a410987055b02726c5c5940420ade485fffac Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 26 Oct 2021 22:18:05 -0400 Subject: [PATCH 009/117] Fix reader/writer states on catastrophic stat failure. default subtly differs here in that the reader/writer states will default to int/byte 0 which is not the same as NA. Perhaps we should just make NA be the 0 state though, which would simplify all use cases... @mgravell @philon-msft thoughts there? Talking PhysicalConnection Reader/Writer status enums. --- src/StackExchange.Redis/PhysicalBridge.cs | 2 +- src/StackExchange.Redis/ServerEndPoint.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 115e3c275..22260abee 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -302,7 +302,7 @@ internal readonly struct BridgeStatus public BacklogStatus BacklogStatus { get; init; } /// - /// Status foor the underlying . + /// Status for the underlying . /// public PhysicalConnection.ConnectionStatus Connection { get; init; } diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 70a1bdf4a..a76f5ca3e 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -445,7 +445,7 @@ internal BridgeStatus GetBridgeStatus(RedisCommand command) System.Diagnostics.Debug.WriteLine(ex.Message); } - return default; + return BridgeStatus.Zero; } internal string GetProfile() From 75545f07d4870994b3284941c12caeb3281d0d63 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 27 Oct 2021 12:31:19 -0400 Subject: [PATCH 010/117] Fix tests (token disposal oops) and tweak for speed The failure tests need not retry loop n times. Also: - Removes the handshake queue, relying on the pipe directly instead. - Formats TestBase for easier maintenance --- src/StackExchange.Redis/PhysicalBridge.cs | 225 +++++++++--------- src/StackExchange.Redis/ServerEndPoint.cs | 7 +- .../ConnectToUnexistingHost.cs | 8 +- .../ExceptionFactoryTests.cs | 6 +- tests/StackExchange.Redis.Tests/Secure.cs | 1 + tests/StackExchange.Redis.Tests/TestBase.cs | 69 ++++-- 6 files changed, 176 insertions(+), 140 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 476aac8f1..7f17795e2 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -29,9 +29,8 @@ internal sealed class PhysicalBridge : IDisposable // We have 3 queues in play on this bridge, and things enter them in this order: // General: for anything coming into the bridge. Everything but handshake commands goes into this queue. // SpecificServer: for anything targeting this endpoint that cannot be handed off to another endpoint. - // Handshake: foor anything coming from our own handshake // The queue priority order is reverse: - // 1. Handshake messages are sent first to re-establish the connection (e.g. AUTH) + // 1. Handshake messages are sent directly to the pipe itself to re-establish the connection (e.g. AUTH) // 2. Specific server messages are sent next (e.g. REPLICAOF - this queue is rare to start with) // 3. All other messages are handled // Note: this doesn't mean sent - if we have another viable endpoint and these messages can be sent on it, @@ -39,9 +38,8 @@ internal sealed class PhysicalBridge : IDisposable // specific server queue so that we unblock the general FIFO queue for other handoffs. // private readonly ConcurrentQueue _backlogGeneral = new(), - _backlogSpecificServer = new(), - _backlogHandshake = new(); - private bool BacklogHasItems => !_backlogGeneral.IsEmpty || !_backlogSpecificServer.IsEmpty || !_backlogHandshake.IsEmpty; + _backlogSpecificServer = new(); + private bool BacklogHasItems => !_backlogGeneral.IsEmpty || !_backlogSpecificServer.IsEmpty; private int _backlogProcessorIsRunning = 0; private int activeWriters = 0; @@ -185,7 +183,7 @@ public WriteResult TryWriteSync(Message message, bool isReplica) return result; } - public ValueTask TryWriteAsync(Message message, bool isReplica) + public ValueTask TryWriteAsync(Message message, bool isReplica, bool isHandshake = false) { if (isDisposed) throw new ObjectDisposedException(Name); if (!IsConnected) return new ValueTask(QueueOrFailMessage(message)); @@ -193,7 +191,7 @@ public ValueTask TryWriteAsync(Message message, bool isReplica) var physical = this.physical; if (physical == null) return new ValueTask(FailDueToNoConnection(message)); - var result = WriteMessageTakingWriteLockAsync(physical, message); + var result = WriteMessageTakingWriteLockAsync(physical, message, isHandshake); LogNonPreferred(message.Flags, isReplica); return result; } @@ -310,7 +308,7 @@ internal readonly struct BridgeStatus /// /// Total number of backlog messages that are in the retry backlog. /// - public int BacklogMessagesPending => BacklogMessagesPendingGeneral + BacklogMessagesPendingSpecificServer + BacklogMessagesPendingHandshake; + public int BacklogMessagesPending => BacklogMessagesPendingGeneral + BacklogMessagesPendingSpecificServer; /// /// The number of backlog messages that are in the retry queue. @@ -320,10 +318,6 @@ internal readonly struct BridgeStatus /// The number of backlog messages that are in the retry queue. /// public int BacklogMessagesPendingSpecificServer { get; init; } - /// - /// The number of backlog messages that are in the retry queue. - /// - public int BacklogMessagesPendingHandshake { get; init; } /// /// Status of the currently processing backlog, if any. @@ -351,7 +345,6 @@ internal readonly struct BridgeStatus IsWriterActive = !_singleWriterMutex.IsAvailable, BacklogMessagesPendingGeneral = _backlogGeneral.Count, BacklogMessagesPendingSpecificServer = _backlogSpecificServer.Count, - BacklogMessagesPendingHandshake = _backlogHandshake.Count, BacklogStatus = _backlogStatus, ActiveBacklog = _activeBacklog, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, @@ -762,7 +755,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course - if (PushToBacklog(message, onlyIfExists: true)) + if (TryPushToBacklog(message, onlyIfExists: true)) { return WriteResult.Success; // queued counts as success } @@ -775,7 +768,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? - if (PushToBacklog(message, onlyIfExists: true)) return WriteResult.Success; // queued counts as success + if (TryPushToBacklog(message, onlyIfExists: true)) return WriteResult.Success; // queued counts as success // no backlog... try to wait with the timeout; // if we *still* can't get it: that counts as @@ -804,21 +797,19 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PushToBacklog(Message message, bool onlyIfExists, bool isHandShake = false) + private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandshake = false) { // If we're unhealthy in middle of a handshake, queue behind so that we AUTH and such in order // For anything not handshake, we handle as before going into the general queue - if (isHandShake) + // Internal calls also aren't queued here because ordering does no matter for them + // Get them on the wire ASAP or fail internally (which doesn't alert the user) to complete ASAP + if (isHandshake || message.IsInternalCall) { - // If this is the initial attempt, bail - we'll come back if we fail to write to the pipe. - if (_backlogHandshake.IsEmpty & onlyIfExists) - { - return false; - } - - _backlogHandshake.Enqueue(message); - StartBacklogProcessor(); - return true; + // TODO: Discussion. + // For handshake commands we did have a backlog in mind here, but if they bypass directly to the pipe + // It's effectively a queue already, or our handshake failed and we're restarting anyhow + // So perhaps, we simply do not need this concurrent queue at all. + return false; } // Note, for deciding emptyness for whether to push onlyIfExists, and start worker, @@ -827,7 +818,7 @@ private bool PushToBacklog(Message message, bool onlyIfExists, bool isHandShake // So strong synchronization is not required. if (_backlogGeneral.IsEmpty & onlyIfExists) return false; - + int count = _backlogGeneral.Count; message.SetBacklogState(count, physical); _backlogGeneral.Enqueue(message); @@ -857,8 +848,8 @@ private void StartBacklogProcessor() // we start is actually useful, despite thinking "but that will just go async and back to the pool" var thread = new Thread(s => ((PhysicalBridge)s).ProcessBacklogsAsync().RedisFireAndForget()) { - IsBackground = true, // don't keep process alive (also: act like the thread-pool used to) - Name = "redisbacklog", // help anyone looking at thread-dumps + IsBackground = true, // don't keep process alive (also: act like the thread-pool used to) + Name = "StackExchange.Redis Backlog", // help anyone looking at thread-dumps }; thread.Start(this); } @@ -870,17 +861,17 @@ private void StartBacklogProcessor() private void CheckBacklogsForTimeouts() // check the head of the backlog queue, consuming anything that looks dead { // Check the head of the backlog queue, consuming anything that looks dead - void crawlQueue(ConcurrentQueue backlog) + void crawlQueue(ConcurrentQueue backlog, int timeout) { var now = Environment.TickCount; - var timeout = TimeoutMilliseconds; // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. // But we reduce contention by only locking if we see something that looks timed out. while (backlog.TryPeek(out Message message)) { - if (message.IsInternalCall) break; // don't stomp these (not that they should have the async timeout flag, but...) + // don't stomp these (not that they should have the async timeout flag, but...) + if (message.IsInternalCall) break; if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking lock (backlog) { @@ -903,9 +894,8 @@ void crawlQueue(ConcurrentQueue backlog) } } - crawlQueue(_backlogHandshake); - crawlQueue(_backlogSpecificServer); - crawlQueue(_backlogGeneral); + crawlQueue(_backlogSpecificServer, TimeoutMilliseconds); + crawlQueue(_backlogGeneral, TimeoutMilliseconds); } internal enum BacklogStatus : byte @@ -931,7 +921,6 @@ internal enum Backlog : byte None, General, SpecificServer, - Handshake, } private volatile Backlog _activeBacklog; @@ -941,10 +930,6 @@ private async Task ProcessBacklogsAsync() _backlogStatus = BacklogStatus.Starting; try { - if (!_backlogHandshake.IsEmpty) - { - await ProcessBridgeBacklogAsync(_backlogHandshake, Backlog.Handshake); - } if (!_backlogSpecificServer.IsEmpty) { await ProcessBridgeBacklogAsync(_backlogSpecificServer, Backlog.SpecificServer); @@ -974,7 +959,7 @@ private async Task ProcessBacklogsAsync() if (BacklogHasItems) { // Check for faults mainly to prevent unlimited tasks spawning in a fault scenario - // - it isn't StackOverflowException due to the Task.Run() + // This won't cause a StackOverflowException due to the Task.Run() handoff if (_backlogStatus != BacklogStatus.Faulted) { StartBacklogProcessor(); @@ -986,97 +971,104 @@ private async Task ProcessBacklogsAsync() private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, Backlog handlingBacklog) { LockToken token = default; + try + { #if DEBUG - int tryToAcquireTime = Environment.TickCount; - var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); - int failureCount = 0; + int tryToAcquireTime = Environment.TickCount; + var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); + int failureCount = 0; #endif - _activeBacklog = handlingBacklog; - _backlogStatus = BacklogStatus.Starting; + _activeBacklog = handlingBacklog; + _backlogStatus = BacklogStatus.Starting; - while (true) - { - // check whether the backlog is empty *before* even trying to get the lock - if (backlog.IsEmpty) return; // nothing to do + while (true) + { + // check whether the backlog is empty *before* even trying to get the lock + if (backlog.IsEmpty) return; // nothing to do - // try and get the lock; if unsuccessful, retry - token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); - if (token.Success) break; // got the lock; now go do something with it + // try and get the lock; if unsuccessful, retry + token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); + if (token.Success) break; // got the lock; now go do something with it #if DEBUG - failureCount++; + failureCount++; #endif - } - _backlogStatus = BacklogStatus.Started; + } + _backlogStatus = BacklogStatus.Started; #if DEBUG - int acquiredTime = Environment.TickCount; - var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); + int acquiredTime = Environment.TickCount; + var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); #endif - // so now we are the writer; write some things! - Message message; - var timeout = TimeoutMilliseconds; - while (true) - { - _backlogStatus = BacklogStatus.CheckingForWork; - // We need to lock _backlog when dequeueing because of - // races with timeout processing logic - lock (backlog) - { - if (!backlog.TryDequeue(out message)) break; // all done - } - - try + // so now we are the writer; write some things! + Message message; + var timeout = TimeoutMilliseconds; + while (true) { - _backlogStatus = BacklogStatus.CheckingForTimeout; - if (message.HasAsyncTimedOut(Environment.TickCount, timeout, out var _)) + _backlogStatus = BacklogStatus.CheckingForWork; + // We need to lock _backlog when dequeueing because of + // races with timeout processing logic + lock (backlog) { - _backlogStatus = BacklogStatus.RecordingTimeout; - var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); -#if DEBUG // additional tracking - ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; - ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; - if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; - if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); - var maxFlush = physical?.MaxFlushTime ?? -1; - if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); - if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; -#endif - message.SetExceptionAndComplete(ex, this); + if (!backlog.TryDequeue(out message)) break; // all done } - else - { - _backlogStatus = BacklogStatus.WritingMessage; - var result = WriteMessageInsideLock(physical, message); - if (result == WriteResult.Success) + try + { + _backlogStatus = BacklogStatus.CheckingForTimeout; + if (message.HasAsyncTimedOut(Environment.TickCount, timeout, out var _)) { - _backlogStatus = BacklogStatus.Flushing; - result = await physical.FlushAsync(false).ConfigureAwait(false); + _backlogStatus = BacklogStatus.RecordingTimeout; + var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); +#if DEBUG // additional tracking + ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; + ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; + if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; + if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); + var maxFlush = physical?.MaxFlushTime ?? -1; + if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); + if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; +#endif + message.SetExceptionAndComplete(ex, this); } - - _backlogStatus = BacklogStatus.MarkingInactive; - if (result != WriteResult.Success) + else { - _backlogStatus = BacklogStatus.RecordingWriteFailure; - var ex = Multiplexer.GetException(result, message, ServerEndPoint); - HandleWriteException(message, ex); + _backlogStatus = BacklogStatus.WritingMessage; + var result = WriteMessageInsideLock(physical, message); + + if (result == WriteResult.Success) + { + _backlogStatus = BacklogStatus.Flushing; + result = await physical.FlushAsync(false).ConfigureAwait(false); + } + + _backlogStatus = BacklogStatus.MarkingInactive; + if (result != WriteResult.Success) + { + _backlogStatus = BacklogStatus.RecordingWriteFailure; + var ex = Multiplexer.GetException(result, message, ServerEndPoint); + HandleWriteException(message, ex); + } } } + catch (Exception ex) + { + _backlogStatus = BacklogStatus.RecordingFault; + HandleWriteException(message, ex); + } + finally + { + UnmarkActiveMessage(message); + } } - catch (Exception ex) - { - _backlogStatus = BacklogStatus.RecordingFault; - HandleWriteException(message, ex); - } - finally - { - UnmarkActiveMessage(message); - } + _backlogStatus = BacklogStatus.SettingIdle; + physical.SetIdle(); + _backlogStatus = BacklogStatus.Inactive; + } + finally + { + token.Dispose(); } - _backlogStatus = BacklogStatus.SettingIdle; - physical.SetIdle(); - _backlogStatus = BacklogStatus.Inactive; } private WriteResult TimedOutBeforeWrite(Message message) @@ -1092,8 +1084,8 @@ private WriteResult TimedOutBeforeWrite(Message message) /// /// The phsyical connection to write to. /// The message to be written. - /// Whether this message is part of the handshake process. - internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool isHandShake = false) + /// Whether this message is part of the handshake process. + internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool isHandshake = false) { /* design decision/choice; the code works fine either way, but if this is * set to *true*, then when we can't take the writer-lock *right away*, @@ -1113,7 +1105,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course - if (PushToBacklog(message, onlyIfExists: true, isHandShake: isHandShake)) + if (TryPushToBacklog(message, onlyIfExists: true, isHandshake: isHandshake)) { return new ValueTask(WriteResult.Success); // queued counts as success } @@ -1131,7 +1123,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? - if (PushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, isHandShake: isHandShake)) + if (TryPushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, isHandshake: isHandshake)) return new ValueTask(WriteResult.Success); // queued counts as success // no backlog... try to wait with the timeout; @@ -1180,6 +1172,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect } } } + #if DEBUG private void RecordLockDuration(int lockTaken) { @@ -1190,7 +1183,7 @@ private void RecordLockDuration(int lockTaken) volatile int _maxLockDuration = -1; #endif - private async ValueTask WriteMessageTakingWriteLockAsync_Awaited(ValueTask pending, PhysicalConnection physical, Message message) + private async ValueTask WriteMessageTakingWriteLockAsync_Awaited(ValueTask pending, PhysicalConnection physical, Message message) { try { diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 4a3c0bfa1..689985cd9 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -519,6 +519,9 @@ internal Message GetTracerMessage(bool assertIdentity) internal bool IsSelectable(RedisCommand command, bool allowDisconnected = false) { var bridge = unselectableReasons == 0 ? GetBridge(command, false) : null; + var bridge = unselectableReasons == 0 || (allowDisconnected && unselectableReasons == UnselectableFlags.DidNotRespond) + ? GetBridge(command, false) + : null; return bridge != null && (allowDisconnected || bridge.IsConnected); } @@ -782,7 +785,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co if (connection == null) { Multiplexer.Trace($"{Format.ToString(this)}: Enqueue (async): " + message); - result = GetBridge(message.Command).TryWriteAsync(message, isReplica); + result = GetBridge(message.Command).TryWriteAsync(message, isReplica, isHandshake: true); } else { @@ -794,7 +797,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co } else { - result = bridge.WriteMessageTakingWriteLockAsync(connection, message, isHandShake: true); + result = bridge.WriteMessageTakingWriteLockAsync(connection, message, isHandshake: true); } } diff --git a/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs b/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs index e2c454a5c..03757d918 100644 --- a/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs +++ b/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs @@ -48,7 +48,7 @@ void innerScenario() { var ex = Assert.Throws(() => { - using (ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,connectTimeout=1000", Writer)) { } + using (ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,connectTimeout=1000,connectRetry=0", Writer)) { } }); Log(ex.ToString()); } @@ -59,7 +59,7 @@ public async Task CanNotOpenNonsenseConnection_DNS() { var ex = await Assert.ThrowsAsync(async () => { - using (await ConnectionMultiplexer.ConnectAsync($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,connectTimeout=1000", Writer).ForAwait()) { } + using (await ConnectionMultiplexer.ConnectAsync($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,connectTimeout=1000,connectRetry=0", Writer).ForAwait()) { } }).ForAwait(); Log(ex.ToString()); } @@ -70,7 +70,7 @@ public async Task CreateDisconnectedNonsenseConnection_IP() await RunBlockingSynchronousWithExtraThreadAsync(innerScenario).ForAwait(); void innerScenario() { - using (var conn = ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,abortConnect=false,connectTimeout=1000", Writer)) + using (var conn = ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,abortConnect=false,connectTimeout=1000,connectRetry=0", Writer)) { Assert.False(conn.GetServer(conn.GetEndPoints().Single()).IsConnected); Assert.False(conn.GetDatabase().IsConnected(default(RedisKey))); @@ -84,7 +84,7 @@ public async Task CreateDisconnectedNonsenseConnection_DNS() await RunBlockingSynchronousWithExtraThreadAsync(innerScenario).ForAwait(); void innerScenario() { - using (var conn = ConnectionMultiplexer.Connect($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,abortConnect=false,connectTimeout=1000", Writer)) + using (var conn = ConnectionMultiplexer.Connect($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,abortConnect=false,connectTimeout=1000,connectRetry=0", Writer)) { Assert.False(conn.GetServer(conn.GetEndPoints().Single()).IsConnected); Assert.False(conn.GetDatabase().IsConnected(default(RedisKey))); diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 25606a8c2..a22e3a133 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -151,6 +151,8 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple var options = new ConfigurationOptions() { AbortOnConnectFail = abortOnConnect, + BacklogPolicy = BacklogPolicy.FailFast, + ConnectRetry = 1, ConnectTimeout = 500, SyncTimeout = 500, KeepAlive = 5000 @@ -160,12 +162,12 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple if (abortOnConnect) { options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); - muxer = ConnectionMultiplexer.Connect(options); + muxer = ConnectionMultiplexer.Connect(options, Writer); } else { options.EndPoints.Add($"doesnot.exist.{Guid.NewGuid():N}:6379"); - muxer = ConnectionMultiplexer.Connect(options); + muxer = ConnectionMultiplexer.Connect(options, Writer); } using (muxer) diff --git a/tests/StackExchange.Redis.Tests/Secure.cs b/tests/StackExchange.Redis.Tests/Secure.cs index 2e7d70929..79cba81b8 100644 --- a/tests/StackExchange.Redis.Tests/Secure.cs +++ b/tests/StackExchange.Redis.Tests/Secure.cs @@ -65,6 +65,7 @@ public async Task ConnectWithWrongPassword(string password) var config = ConfigurationOptions.Parse(GetConfiguration()); config.Password = password; config.ConnectRetry = 0; // we don't want to retry on closed sockets in this case. + config.BacklogPolicy = BacklogPolicy.FailFast; var ex = await Assert.ThrowsAsync(async () => { diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index 2928c894e..0750efa18 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -119,6 +119,7 @@ static TestBase() Console.WriteLine(" GC LOH Mode: " + GCSettings.LargeObjectHeapCompactionMode); Console.WriteLine(" GC Latency Mode: " + GCSettings.LatencyMode); } + internal static string Time() => DateTime.UtcNow.ToString("HH:mm:ss.ffff"); protected void OnConnectionFailed(object sender, ConnectionFailedEventArgs e) { @@ -223,13 +224,25 @@ protected IServer GetAnyMaster(IConnectionMultiplexer muxer) } internal virtual IInternalConnectionMultiplexer Create( - string clientName = null, int? syncTimeout = null, bool? allowAdmin = null, int? keepAlive = null, - int? connectTimeout = null, string password = null, string tieBreaker = null, TextWriter log = null, - bool fail = true, string[] disabledCommands = null, string[] enabledCommands = null, - bool checkConnect = true, string failMessage = null, - string channelPrefix = null, Proxy? proxy = null, - string configuration = null, bool logTransactionData = true, - bool shared = true, int? defaultDatabase = null, + string clientName = null, + int? syncTimeout = null, + bool? allowAdmin = null, + int? keepAlive = null, + int? connectTimeout = null, + string password = null, + string tieBreaker = null, + TextWriter log = null, + bool fail = true, + string[] disabledCommands = null, + string[] enabledCommands = null, + bool checkConnect = true, + string failMessage = null, + string channelPrefix = null, + Proxy? proxy = null, + string configuration = null, + bool logTransactionData = true, + bool shared = true, + int? defaultDatabase = null, BacklogPolicy backlogPolicy = null, [CallerMemberName] string caller = null) { @@ -238,8 +251,21 @@ internal virtual IInternalConnectionMultiplexer Create( Assert.True(false, "Failure: Be sure to call the TestBase constuctor like this: BasicOpsTests(ITestOutputHelper output) : base(output) { }"); } - if (shared && _fixture != null && _fixture.IsEnabled && enabledCommands == null && disabledCommands == null && fail && channelPrefix == null && proxy == null - && configuration == null && password == null && tieBreaker == null && defaultDatabase == null && (allowAdmin == null || allowAdmin == true) && expectedFailCount == 0 && backlogPolicy == null) + // Share a connection if instructed to and we can - many specifics mean no sharing + if (shared + && _fixture != null && _fixture.IsEnabled + && enabledCommands == null + && disabledCommands == null + && fail + && channelPrefix == null + && proxy == null + && configuration == null + && password == null + && tieBreaker == null + && defaultDatabase == null + && (allowAdmin == null || allowAdmin == true) + && expectedFailCount == 0 + && backlogPolicy == null) { configuration = GetConfiguration(); if (configuration == _fixture.Configuration) @@ -266,18 +292,29 @@ internal virtual IInternalConnectionMultiplexer Create( public static ConnectionMultiplexer CreateDefault( TextWriter output, - string clientName = null, int? syncTimeout = null, bool? allowAdmin = null, int? keepAlive = null, - int? connectTimeout = null, string password = null, string tieBreaker = null, TextWriter log = null, - bool fail = true, string[] disabledCommands = null, string[] enabledCommands = null, - bool checkConnect = true, string failMessage = null, - string channelPrefix = null, Proxy? proxy = null, - string configuration = null, bool logTransactionData = true, + string clientName = null, + int? syncTimeout = null, + bool? allowAdmin = null, + int? keepAlive = null, + int? connectTimeout = null, + string password = null, + string tieBreaker = null, + TextWriter log = null, + bool fail = true, + string[] disabledCommands = null, + string[] enabledCommands = null, + bool checkConnect = true, + string failMessage = null, + string channelPrefix = null, + Proxy? proxy = null, + string configuration = null, + bool logTransactionData = true, int? defaultDatabase = null, BacklogPolicy backlogPolicy = null, [CallerMemberName] string caller = null) { StringWriter localLog = null; - if(log == null) + if (log == null) { log = localLog = new StringWriter(); } From 6c8edf6785392960a42c86dd6267ad9715455cd2 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 27 Oct 2021 12:32:23 -0400 Subject: [PATCH 011/117] Revert ServerEndPoint (bad commit split) --- src/StackExchange.Redis/ServerEndPoint.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 689985cd9..28f5850e8 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -519,9 +519,6 @@ internal Message GetTracerMessage(bool assertIdentity) internal bool IsSelectable(RedisCommand command, bool allowDisconnected = false) { var bridge = unselectableReasons == 0 ? GetBridge(command, false) : null; - var bridge = unselectableReasons == 0 || (allowDisconnected && unselectableReasons == UnselectableFlags.DidNotRespond) - ? GetBridge(command, false) - : null; return bridge != null && (allowDisconnected || bridge.IsConnected); } From 076bc0099a636ec966f7c7d5f6177ef558114aad Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 2 Nov 2021 12:57:41 -0400 Subject: [PATCH 012/117] Backlog add hook points past .IsConnected and initial tests This gets us going to queue commands while disconnected. We still need handoff to sibling connections but this gets us going. --- src/StackExchange.Redis/PhysicalBridge.cs | 52 +++++- src/StackExchange.Redis/PhysicalConnection.cs | 1 + src/StackExchange.Redis/RedisServer.cs | 4 +- src/StackExchange.Redis/ServerEndPoint.cs | 9 +- .../StackExchange.Redis.Tests/BacklogTests.cs | 153 +++++++++++++++--- .../ConnectionFailedErrors.cs | 1 + 6 files changed, 189 insertions(+), 31 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 7f17795e2..8da039505 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -152,6 +152,13 @@ private WriteResult QueueOrFailMessage(Message message) _backlogGeneral.Enqueue(message); return WriteResult.Success; // we'll take it... } + else if (Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + message.SetEnqueued(null); + message.SetBacklogState(_backlogGeneral.Count, null); + _backlogGeneral.Enqueue(message); + return WriteResult.Success; // we'll queue for retry here... + } else { // sorry, we're just not ready for you yet; @@ -178,6 +185,19 @@ public WriteResult TryWriteSync(Message message, bool isReplica) var physical = this.physical; if (physical == null) return FailDueToNoConnection(message); + if (physical == null) + { + // If we're not connected yet and supposed to, queue it up + if (Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + if (TryPushToBacklog(message, onlyIfExists: false)) + { + message.SetEnqueued(null); + return WriteResult.Success; + } + } + return FailDueToNoConnection(message); + } var result = WriteMessageTakingWriteLockSync(physical, message); LogNonPreferred(message.Flags, isReplica); return result; @@ -189,7 +209,19 @@ public ValueTask TryWriteAsync(Message message, bool isReplica, boo if (!IsConnected) return new ValueTask(QueueOrFailMessage(message)); var physical = this.physical; - if (physical == null) return new ValueTask(FailDueToNoConnection(message)); + if (physical == null) + { + // If we're not connected yet and supposed to, queue it up + if (!isHandshake && Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + if (TryPushToBacklog(message, onlyIfExists: false)) + { + message.SetEnqueued(null); + return new ValueTask(WriteResult.Success); + } + } + return new ValueTask(FailDueToNoConnection(message)); + } var result = WriteMessageTakingWriteLockAsync(physical, message, isHandshake); LogNonPreferred(message.Flags, isReplica); @@ -540,7 +572,15 @@ internal void OnHeartbeat(bool ifConnectedOnly) bool runThisTime = false; try { - CheckBacklogsForTimeouts(); + if (BacklogHasItems) + { + CheckBacklogsForTimeouts(); + // Ensure we're processing the backlog + if (BacklogHasItems) + { + StartBacklogProcessor(); + } + } runThisTime = !isDisposed && Interlocked.CompareExchange(ref beating, 1, 0) == 0; if (!runThisTime) return; @@ -970,6 +1010,8 @@ private async Task ProcessBacklogsAsync() private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, Backlog handlingBacklog) { + // Importantly: don't assume we have a physical connection here + // We are very likely to hit a state where it's not re-established or even referenced here LockToken token = default; try { @@ -1031,7 +1073,7 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, B #endif message.SetExceptionAndComplete(ex, this); } - else + else if (physical?.HasOuputPipe == true) { _backlogStatus = BacklogStatus.WritingMessage; var result = WriteMessageInsideLock(physical, message); @@ -1062,7 +1104,7 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, B } } _backlogStatus = BacklogStatus.SettingIdle; - physical.SetIdle(); + physical?.SetIdle(); _backlogStatus = BacklogStatus.Inactive; } finally @@ -1105,7 +1147,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course - if (TryPushToBacklog(message, onlyIfExists: true, isHandshake: isHandshake)) + if (TryPushToBacklog(message, onlyIfExists: physical.HasOuputPipe, isHandshake: isHandshake)) { return new ValueTask(WriteResult.Success); // queued counts as success } diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 7b0096edc..06086bbce 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -68,6 +68,7 @@ internal void GetBytes(out long sent, out long received) } private IDuplexPipe _ioPipe; + internal bool HasOuputPipe => _ioPipe?.Output != null; private Socket _socket; private Socket VolatileSocket => Volatile.Read(ref _socket); diff --git a/src/StackExchange.Redis/RedisServer.cs b/src/StackExchange.Redis/RedisServer.cs index 8913aef74..67c592424 100644 --- a/src/StackExchange.Redis/RedisServer.cs +++ b/src/StackExchange.Redis/RedisServer.cs @@ -575,7 +575,7 @@ internal override Task ExecuteAsync(Message message, ResultProcessor pr { // inject our expected server automatically if (server == null) server = this.server; FixFlags(message, server); - if (!server.IsConnected) + if (!server.IsConnected && !multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) { if (message == null) return CompletedTask.Default(asyncState); if (message.IsFireAndForget) return CompletedTask.Default(null); // F+F explicitly does not get async-state @@ -592,7 +592,7 @@ internal override T ExecuteSync(Message message, ResultProcessor processor { // inject our expected server automatically if (server == null) server = this.server; FixFlags(message, server); - if (!server.IsConnected) + if (!server.IsConnected && !multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) { if (message == null || message.IsFireAndForget) return default(T); throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server); diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 28f5850e8..fd2c2d3b6 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -518,7 +518,14 @@ internal Message GetTracerMessage(bool assertIdentity) internal bool IsSelectable(RedisCommand command, bool allowDisconnected = false) { - var bridge = unselectableReasons == 0 ? GetBridge(command, false) : null; + //var bridge = unselectableReasons == 0 ? GetBridge(command, false) : null; + + // TODO: Possible v2 (need to observe flags) + // Until we've connected at least once, we're going too have a DidNotRespond unselectable reason present + var bridge = unselectableReasons == 0 || (allowDisconnected && unselectableReasons == UnselectableFlags.DidNotRespond) + ? GetBridge(command, false) + : null; + return bridge != null && (allowDisconnected || bridge.IsConnected); } diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index 3c8268e44..f3f37c15c 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -1,5 +1,4 @@ using System; -using System.Threading; using System.Threading.Tasks; using Xunit; using Xunit.Abstractions; @@ -12,39 +11,147 @@ public BacklogTests(ITestOutputHelper output) : base (output) { } protected override string GetConfiguration() => TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; + // TODO: Sync route testing (e.g. Ping() for TryWriteSync path) + // TODO: Specific server calls + + [Fact] + public async Task FailFast() + { + try + { + // Ensuring the FailFast policy errors immediate with no connection available exceptions + var options = new ConfigurationOptions() + { + BacklogPolicy = BacklogPolicy.FailFast, + AbortOnConnectFail = false, + ConnectTimeout = 1000, + ConnectRetry = 2, + SyncTimeout = 10000, + KeepAlive = 10000, + AsyncTimeout = 5000, + AllowAdmin = true, + }; + options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); + + using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); + + var db = muxer.GetDatabase(); + Writer.WriteLine("Test: Initial (connected) ping"); + await db.PingAsync(); + + var server = muxer.GetServerSnapshot()[0]; + var stats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal + + // Fail the connection + Writer.WriteLine("Test: Simulating failure"); + muxer.AllowConnect = false; + server.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(muxer.IsConnected); + + // Queue up some commands + Writer.WriteLine("Test: Disconnected pings"); + await Assert.ThrowsAsync(() => db.PingAsync()); + + var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Assert.False(muxer.IsConnected); + Assert.Equal(0, disconnectedStats.BacklogMessagesPending); + + Writer.WriteLine("Test: Allowing reconnect"); + muxer.AllowConnect = true; + Writer.WriteLine("Test: Awaiting reconnect"); + await UntilCondition(TimeSpan.FromSeconds(3), () => muxer.IsConnected).ForAwait(); + + Writer.WriteLine("Test: Reconnecting"); + Assert.True(muxer.IsConnected); + var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, reconnectedStats.BacklogMessagesPending); + Assert.Equal(0, reconnectedStats.BacklogMessagesPendingGeneral); + Assert.Equal(0, reconnectedStats.BacklogMessagesPendingSpecificServer); + + _ = db.PingAsync(); + _ = db.PingAsync(); + var lastPing = db.PingAsync(); + + // We should see none queued + Assert.Equal(0, stats.BacklogMessagesPending); + await lastPing; + } + finally + { + ClearAmbientFailures(); + } + } + + [Fact] - public async Task BasicTest() + public async Task QueuesAndFlushesAfterReconnecting() { try { - using (var muxer = Create(keepAlive: 1, connectTimeout: 10000, allowAdmin: true, shared: false)) + var options = new ConfigurationOptions() { - var conn = muxer.GetDatabase(); - conn.Ping(); + BacklogPolicy = BacklogPolicy.Default, + AbortOnConnectFail = false, + ConnectTimeout = 1000, + ConnectRetry = 2, + SyncTimeout = 10000, + KeepAlive = 10000, + AsyncTimeout = 5000, + AllowAdmin = true, + }; + options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); + + using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); + + var db = muxer.GetDatabase(); + Writer.WriteLine("Test: Initial (connected) ping"); + await db.PingAsync(); + + var server = muxer.GetServerSnapshot()[0]; + var stats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal + + // Fail the connection + Writer.WriteLine("Test: Simulating failure"); + muxer.AllowConnect = false; + server.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(muxer.IsConnected); + + // Queue up some commands + Writer.WriteLine("Test: Disconnected pings"); + _ = db.PingAsync(); + _ = db.PingAsync(); + var lastPing = db.PingAsync(); + + // TODO: Add specific server call - var server = muxer.GetServer(muxer.GetEndPoints()[0]); - var server2 = muxer.GetServer(muxer.GetEndPoints()[1]); + var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Assert.False(muxer.IsConnected); + Assert.True(disconnectedStats.BacklogMessagesPending >= 3, $"Expected {nameof(disconnectedStats.BacklogMessagesPending)} > 3, got {disconnectedStats.BacklogMessagesPending}"); - muxer.AllowConnect = false; + Writer.WriteLine("Test: Allowing reconnect"); + muxer.AllowConnect = true; + Writer.WriteLine("Test: Awaiting reconnect"); + await UntilCondition(TimeSpan.FromSeconds(3), () => muxer.IsConnected).ForAwait(); - // muxer.IsConnected is true of *any* are connected, simulate failure for all cases. - server.SimulateConnectionFailure(SimulatedFailureType.All); - Assert.False(server.IsConnected); - Assert.True(server2.IsConnected); - Assert.True(muxer.IsConnected); + Writer.WriteLine("Test: Awaiting ping1"); + await lastPing; - server2.SimulateConnectionFailure(SimulatedFailureType.All); - Assert.False(server.IsConnected); - Assert.False(server2.IsConnected); - Assert.False(muxer.IsConnected); + Writer.WriteLine("Test: Reconnecting"); + Assert.True(muxer.IsConnected); + var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, reconnectedStats.BacklogMessagesPending); + Assert.Equal(0, reconnectedStats.BacklogMessagesPendingGeneral); + Assert.Equal(0, reconnectedStats.BacklogMessagesPendingSpecificServer); - // should reconnect within 1 keepalive interval - muxer.AllowConnect = true; - Log("Waiting for reconnect"); - await UntilCondition(TimeSpan.FromSeconds(2), () => muxer.IsConnected).ForAwait(); + _ = db.PingAsync(); + _ = db.PingAsync(); + lastPing = db.PingAsync(); - Assert.True(muxer.IsConnected); - } + // We should see none queued + Assert.Equal(0, stats.BacklogMessagesPending); + await lastPing; } finally { diff --git a/tests/StackExchange.Redis.Tests/ConnectionFailedErrors.cs b/tests/StackExchange.Redis.Tests/ConnectionFailedErrors.cs index cd3521788..94623e4fd 100644 --- a/tests/StackExchange.Redis.Tests/ConnectionFailedErrors.cs +++ b/tests/StackExchange.Redis.Tests/ConnectionFailedErrors.cs @@ -105,6 +105,7 @@ void innerScenario() options.Password = ""; options.AbortOnConnectFail = false; options.ConnectTimeout = 1000; + options.BacklogPolicy = BacklogPolicy.FailFast; var outer = Assert.Throws(() => { using (var muxer = ConnectionMultiplexer.Connect(options)) From 9f86830ffd14efbb6389f36510860438159500f6 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 12 Nov 2021 09:30:56 -0500 Subject: [PATCH 013/117] Remove server-specific for now --- src/StackExchange.Redis/ExceptionFactory.cs | 1 - src/StackExchange.Redis/PhysicalBridge.cs | 166 +++++++----------- .../StackExchange.Redis.Tests/BacklogTests.cs | 4 - 3 files changed, 62 insertions(+), 109 deletions(-) diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 53ece65eb..4cc274d24 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -328,7 +328,6 @@ ServerEndPoint server if (bs.BacklogMessagesPending != 0) { Add(data, sb, "Backlog-Writer", "bw", bs.BacklogStatus.ToString()); - Add(data, sb, "Backlog-Name", "abl", bs.ActiveBacklog.ToString()); } if (bs.Connection.ReadStatus != PhysicalConnection.ReadStatus.NA) Add(data, sb, "Read-State", "rs", bs.Connection.ReadStatus.ToString()); if (bs.Connection.WriteStatus != PhysicalConnection.WriteStatus.NA) Add(data, sb, "Write-State", "ws", bs.Connection.WriteStatus.ToString()); diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 8da039505..6936b5672 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -26,20 +26,17 @@ internal sealed class PhysicalBridge : IDisposable private readonly long[] profileLog = new long[ProfileLogSamples]; - // We have 3 queues in play on this bridge, and things enter them in this order: - // General: for anything coming into the bridge. Everything but handshake commands goes into this queue. - // SpecificServer: for anything targeting this endpoint that cannot be handed off to another endpoint. - // The queue priority order is reverse: - // 1. Handshake messages are sent directly to the pipe itself to re-establish the connection (e.g. AUTH) - // 2. Specific server messages are sent next (e.g. REPLICAOF - this queue is rare to start with) - // 3. All other messages are handled - // Note: this doesn't mean sent - if we have another viable endpoint and these messages can be sent on it, - // then we'll either send them to it. Any messages specifying this endpoint explicitly will go into the - // specific server queue so that we unblock the general FIFO queue for other handoffs. - // - private readonly ConcurrentQueue _backlogGeneral = new(), - _backlogSpecificServer = new(); - private bool BacklogHasItems => !_backlogGeneral.IsEmpty || !_backlogSpecificServer.IsEmpty; + /// + /// We have 1 queue in play on this bridge. + /// We're bypassing the queue for handshake events that go straight to the socket. + /// Everything else that's not an internal call goes into the queue if there is a queue. + /// + /// In a later release we want to remove per-server events from this queue compeltely and shunt queued messages + /// to another capable primary connection if oone if avaialble to process them faster (order is already hosed). + /// For now, simplicity in: queue it all, replay or timeout it all. + /// + private readonly ConcurrentQueue _backlog = new(); + private bool BacklogHasItems => !_backlog.IsEmpty; private int _backlogProcessorIsRunning = 0; private int activeWriters = 0; @@ -148,15 +145,15 @@ private WriteResult QueueOrFailMessage(Message message) // you can go in the queue, but we won't be starting // a worker, because the handshake has not completed message.SetEnqueued(null); - message.SetBacklogState(_backlogGeneral.Count, null); - _backlogGeneral.Enqueue(message); + message.SetBacklogState(_backlog.Count, null); + _backlog.Enqueue(message); return WriteResult.Success; // we'll take it... } else if (Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) { message.SetEnqueued(null); - message.SetBacklogState(_backlogGeneral.Count, null); - _backlogGeneral.Enqueue(message); + message.SetBacklogState(_backlog.Count, null); + _backlog.Enqueue(message); return WriteResult.Success; // we'll queue for retry here... } else @@ -338,27 +335,14 @@ internal readonly struct BridgeStatus public bool IsWriterActive { get; init; } /// - /// Total number of backlog messages that are in the retry backlog. - /// - public int BacklogMessagesPending => BacklogMessagesPendingGeneral + BacklogMessagesPendingSpecificServer; - - /// - /// The number of backlog messages that are in the retry queue. - /// - public int BacklogMessagesPendingGeneral { get; init; } - /// - /// The number of backlog messages that are in the retry queue. + /// The number of messages that are in the backlog queue (waiting to be sent when the connection is healthy again). /// - public int BacklogMessagesPendingSpecificServer { get; init; } + public int BacklogMessagesPending { get; init; } /// /// Status of the currently processing backlog, if any. /// public BacklogStatus BacklogStatus { get; init; } - /// - /// Name of the currently processing backlog, if any. - /// - public Backlog ActiveBacklog { get; init; } /// /// Status for the underlying . @@ -375,10 +359,8 @@ internal readonly struct BridgeStatus { MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), IsWriterActive = !_singleWriterMutex.IsAvailable, - BacklogMessagesPendingGeneral = _backlogGeneral.Count, - BacklogMessagesPendingSpecificServer = _backlogSpecificServer.Count, + BacklogMessagesPending = _backlog.Count, BacklogStatus = _backlogStatus, - ActiveBacklog = _activeBacklog, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, }; @@ -529,13 +511,7 @@ internal void OnDisconnected(ConnectionFailureType failureType, PhysicalConnecti private void AbandonPendingBacklog(Exception ex) { - // Drain both lower queues, but not handshake since that's likely to cause overlapping failure shenanigans. - while (_backlogSpecificServer.TryDequeue(out Message next)) - { - Multiplexer?.OnMessageFaulted(next, ex); - next.SetExceptionAndComplete(ex, this); - } - while (_backlogGeneral.TryDequeue(out Message next)) + while (_backlog.TryDequeue(out Message next)) { Multiplexer?.OnMessageFaulted(next, ex); next.SetExceptionAndComplete(ex, this); @@ -839,16 +815,14 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandshake = false) { - // If we're unhealthy in middle of a handshake, queue behind so that we AUTH and such in order - // For anything not handshake, we handle as before going into the general queue - // Internal calls also aren't queued here because ordering does no matter for them - // Get them on the wire ASAP or fail internally (which doesn't alert the user) to complete ASAP + // In the handshake case: send the command directly through. + // If we're disconnected *in the middle of a handshake*, we've bombed a brand new socket and failing, + // backing off, and retrying next heartbeat is best anyway. + // + // Internal calls also shouldn't queue - try immediately. If these aren't errors (most aren't), we + // won't alert the user. if (isHandshake || message.IsInternalCall) { - // TODO: Discussion. - // For handshake commands we did have a backlog in mind here, but if they bypass directly to the pipe - // It's effectively a queue already, or our handshake failed and we're restarting anyhow - // So perhaps, we simply do not need this concurrent queue at all. return false; } @@ -856,12 +830,11 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandsha // we only need care if WE are able to // see the queue when its empty. Not whether anyone else sees it as empty. // So strong synchronization is not required. - if (_backlogGeneral.IsEmpty & onlyIfExists) return false; - + if (_backlog.IsEmpty & onlyIfExists) return false; - int count = _backlogGeneral.Count; + int count = _backlog.Count; message.SetBacklogState(count, physical); - _backlogGeneral.Enqueue(message); + _backlog.Enqueue(message); // The correct way to decide to start backlog process is not based on previously empty // but based on a) not empty now (we enqueued!) and b) no backlog processor already running. @@ -898,44 +871,42 @@ private void StartBacklogProcessor() private volatile int _backlogProcessorRequestedTime; #endif - private void CheckBacklogsForTimeouts() // check the head of the backlog queue, consuming anything that looks dead + /// + /// Crawls from the head of the backlog queue, consuming anything that should have timed out + /// and pruning it accoordingly (these messages will get timeout exceptions). + /// + private void CheckBacklogsForTimeouts() { - // Check the head of the backlog queue, consuming anything that looks dead - void crawlQueue(ConcurrentQueue backlog, int timeout) - { - var now = Environment.TickCount; + var now = Environment.TickCount; + var timeout = TimeoutMilliseconds; - // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use - // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. - // But we reduce contention by only locking if we see something that looks timed out. - while (backlog.TryPeek(out Message message)) + // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use + // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. + // But we reduce contention by only locking if we see something that looks timed out. + while (_backlog.TryPeek(out Message message)) + { + // don't stomp these (not that they should have the async timeout flag, but...) + if (message.IsInternalCall) break; + if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking + lock (_backlog) { - // don't stomp these (not that they should have the async timeout flag, but...) + // peek again since we didn't have lock before... + // and rerun the exact same checks as above, note that it may be a different message now + if (!_backlog.TryPeek(out message)) break; if (message.IsInternalCall) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking - lock (backlog) - { - // peek again since we didn't have lock before... - // and rerun the exact same checks as above, note that it may be a different message now - if (!backlog.TryPeek(out message)) break; - if (message.IsInternalCall) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; + if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; - if (!backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real - { - throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); - } + if (!_backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real + { + throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); } - - // Tell the message it has failed - // Note: Attempting to *avoid* reentrancy/deadlock issues by not holding the lock while completing messages. - var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); - message.SetExceptionAndComplete(ex, this); } - } - crawlQueue(_backlogSpecificServer, TimeoutMilliseconds); - crawlQueue(_backlogGeneral, TimeoutMilliseconds); + // Tell the message it has failed + // Note: Attempting to *avoid* reentrancy/deadlock issues by not holding the lock while completing messages. + var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); + message.SetExceptionAndComplete(ex, this); + } } internal enum BacklogStatus : byte @@ -956,28 +927,18 @@ internal enum BacklogStatus : byte Faulted, } - internal enum Backlog : byte - { - None, - General, - SpecificServer, - } - - private volatile Backlog _activeBacklog; private volatile BacklogStatus _backlogStatus; private async Task ProcessBacklogsAsync() { _backlogStatus = BacklogStatus.Starting; try { - if (!_backlogSpecificServer.IsEmpty) - { - await ProcessBridgeBacklogAsync(_backlogSpecificServer, Backlog.SpecificServer); - } - if (!_backlogGeneral.IsEmpty) + if (!_backlog.IsEmpty) { - await ProcessBridgeBacklogAsync(_backlogGeneral, Backlog.General); // Needs handoff - // only handoff to another completely viable connection + // TODO: vNext handoff this backlog to another primary ("can handle everything") connection + // and remove any per-server commands. This means we need to track a bit of whether something + // was server-endpoint-specific in PrepareToPushMessageToBridge (was the server ref null or not) + await ProcessBridgeBacklogAsync(_backlog); // Needs handoff } } catch @@ -986,8 +947,6 @@ private async Task ProcessBacklogsAsync() } finally { - _activeBacklog = Backlog.None; - // Do this in finally block, so that thread aborts can't convince us the backlog processor is running forever if (Interlocked.CompareExchange(ref _backlogProcessorIsRunning, 0, 1) != 1) { @@ -1008,7 +967,7 @@ private async Task ProcessBacklogsAsync() } } - private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, Backlog handlingBacklog) + private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) { // Importantly: don't assume we have a physical connection here // We are very likely to hit a state where it's not re-established or even referenced here @@ -1020,7 +979,6 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog, B var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); int failureCount = 0; #endif - _activeBacklog = handlingBacklog; _backlogStatus = BacklogStatus.Starting; while (true) diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index f3f37c15c..5482866a7 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -66,8 +66,6 @@ public async Task FailFast() Assert.True(muxer.IsConnected); var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); - Assert.Equal(0, reconnectedStats.BacklogMessagesPendingGeneral); - Assert.Equal(0, reconnectedStats.BacklogMessagesPendingSpecificServer); _ = db.PingAsync(); _ = db.PingAsync(); @@ -142,8 +140,6 @@ public async Task QueuesAndFlushesAfterReconnecting() Assert.True(muxer.IsConnected); var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); - Assert.Equal(0, reconnectedStats.BacklogMessagesPendingGeneral); - Assert.Equal(0, reconnectedStats.BacklogMessagesPendingSpecificServer); _ = db.PingAsync(); _ = db.PingAsync(); From f7b09752a118684ccda732fce801a9649b86b043 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 12 Nov 2021 09:35:38 -0500 Subject: [PATCH 014/117] Cleanup --- src/StackExchange.Redis/PhysicalBridge.cs | 1 - src/StackExchange.Redis/ServerEndPoint.cs | 3 --- 2 files changed, 4 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 6936b5672..9274be5f8 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -338,7 +338,6 @@ internal readonly struct BridgeStatus /// The number of messages that are in the backlog queue (waiting to be sent when the connection is healthy again). /// public int BacklogMessagesPending { get; init; } - /// /// Status of the currently processing backlog, if any. /// diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index fd2c2d3b6..c5f5bbd5f 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -518,9 +518,6 @@ internal Message GetTracerMessage(bool assertIdentity) internal bool IsSelectable(RedisCommand command, bool allowDisconnected = false) { - //var bridge = unselectableReasons == 0 ? GetBridge(command, false) : null; - - // TODO: Possible v2 (need to observe flags) // Until we've connected at least once, we're going too have a DidNotRespond unselectable reason present var bridge = unselectableReasons == 0 || (allowDisconnected && unselectableReasons == UnselectableFlags.DidNotRespond) ? GetBridge(command, false) From eb9c0c3173dd3414633e63573aed3ce0f49caca3 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 16 Nov 2021 08:57:09 -0500 Subject: [PATCH 015/117] Update ServerEndPoint.cs --- src/StackExchange.Redis/ServerEndPoint.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index c5f5bbd5f..c30384bd9 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -773,7 +773,7 @@ internal string Summary() } /// - /// Write the message directly or queues in the handshake (priority) queue. + /// Write the message directly to the pipe or fail...will not queue. /// internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection connection, Message message, ResultProcessor processor) { From 7e176898db5492182bdc9c5decbf538660ee5d69 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 16 Nov 2021 10:30:51 -0500 Subject: [PATCH 016/117] ServerEndPoint: clear flags much faster We observe a race here between the connection state saying we're connected and the connection actually being selectable, this tightens that window tremendously. --- src/StackExchange.Redis/ServerEndPoint.cs | 3 +++ .../StackExchange.Redis.Tests/BacklogTests.cs | 27 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index c30384bd9..959e576cd 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -582,6 +582,9 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) var bridge = connection?.BridgeCouldBeNull; if (bridge != null) { + // Clear the unselectable flag ASAP since we are open for business + ClearUnselectable(UnselectableFlags.DidNotRespond); + if (bridge == subscription) { Multiplexer.ResendSubscriptions(this); diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index 5482866a7..1ccd50fa7 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -17,6 +17,25 @@ public BacklogTests(ITestOutputHelper output) : base (output) { } [Fact] public async Task FailFast() { + void PrintSnapshot(ConnectionMultiplexer muxer) + { + Writer.WriteLine("Snapshot summary:"); + foreach (var server in muxer.GetServerSnapshot()) + { + Writer.WriteLine($" {server.EndPoint}: "); + Writer.WriteLine($" Type: {server.ServerType}"); + Writer.WriteLine($" IsConnected: {server.IsConnected}"); + Writer.WriteLine($" IsConnecting: {server.IsConnecting}"); + Writer.WriteLine($" IsSelectable(allowDisconnected: true): {server.IsSelectable(RedisCommand.PING, true)}"); + Writer.WriteLine($" IsSelectable(allowDisconnected: false): {server.IsSelectable(RedisCommand.PING, false)}"); + Writer.WriteLine($" UnselectableFlags: {server.GetUnselectableFlags()}"); + var bridge = server.GetBridge(RedisCommand.PING, create: false); + Writer.WriteLine($" GetBridge: {bridge}"); + Writer.WriteLine($" IsConnected: {bridge.IsConnected}"); + Writer.WriteLine($" ConnectionState: {bridge.ConnectionState}"); + } + } + try { // Ensuring the FailFast policy errors immediate with no connection available exceptions @@ -64,6 +83,7 @@ public async Task FailFast() Writer.WriteLine("Test: Reconnecting"); Assert.True(muxer.IsConnected); + Assert.True(server.IsConnected); var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); @@ -71,6 +91,11 @@ public async Task FailFast() _ = db.PingAsync(); var lastPing = db.PingAsync(); + // For debug, print out the snapshot and server states + PrintSnapshot(muxer); + + Assert.NotNull(muxer.SelectServer(Message.Create(-1, CommandFlags.None, RedisCommand.PING))); + // We should see none queued Assert.Equal(0, stats.BacklogMessagesPending); await lastPing; @@ -136,7 +161,7 @@ public async Task QueuesAndFlushesAfterReconnecting() Writer.WriteLine("Test: Awaiting ping1"); await lastPing; - Writer.WriteLine("Test: Reconnecting"); + Writer.WriteLine("Test: Checking reconnected"); Assert.True(muxer.IsConnected); var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); From dc32e0d774e498b01f8e59267b239945d2a4e439 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 16 Nov 2021 10:54:46 -0500 Subject: [PATCH 017/117] Tweak so sync messages get ejected from the queue --- src/StackExchange.Redis/PhysicalBridge.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 9274be5f8..258c171fb 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -884,16 +884,15 @@ private void CheckBacklogsForTimeouts() // But we reduce contention by only locking if we see something that looks timed out. while (_backlog.TryPeek(out Message message)) { - // don't stomp these (not that they should have the async timeout flag, but...) - if (message.IsInternalCall) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; // not a timeout - we can stop looking + // See if the message has pass our async timeout threshold + // or has otherwise been completed (e.g. a sync wait timed out) which would have cleared the ResultBox + if (message.HasAsyncTimedOut(now, timeout, out var _) || message.ResultBox == null) break; // not a timeout - we can stop looking lock (_backlog) { - // peek again since we didn't have lock before... + // Peek again since we didn't have lock before... // and rerun the exact same checks as above, note that it may be a different message now if (!_backlog.TryPeek(out message)) break; - if (message.IsInternalCall) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _)) break; + if (!message.HasAsyncTimedOut(now, timeout, out var _) && message.ResultBox != null) break; if (!_backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real { From 6f26239dc44836d18e4679d2a23ea55b51e693fb Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 16 Nov 2021 10:59:26 -0500 Subject: [PATCH 018/117] Add prerelease label --- version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.json b/version.json index 4d664c308..8191d532a 100644 --- a/version.json +++ b/version.json @@ -1,5 +1,5 @@ { - "version": "2.2", + "version": "2.5-prerelease", "versionHeightOffset": -1, "assemblyVersion": "2.0", "publicReleaseRefSpec": [ From 2d158065c8f2c3cb1dad98c37664603253d38529 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 12 Dec 2021 10:40:15 -0500 Subject: [PATCH 019/117] Typo fix --- src/StackExchange.Redis/PhysicalBridge.cs | 4 ++-- src/StackExchange.Redis/PhysicalConnection.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 258c171fb..11abe3bca 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -1029,7 +1029,7 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) #endif message.SetExceptionAndComplete(ex, this); } - else if (physical?.HasOuputPipe == true) + else if (physical?.HasOutputPipe == true) { _backlogStatus = BacklogStatus.WritingMessage; var result = WriteMessageInsideLock(physical, message); @@ -1103,7 +1103,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course - if (TryPushToBacklog(message, onlyIfExists: physical.HasOuputPipe, isHandshake: isHandshake)) + if (TryPushToBacklog(message, onlyIfExists: physical.HasOutputPipe, isHandshake: isHandshake)) { return new ValueTask(WriteResult.Success); // queued counts as success } diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 06086bbce..8588fcc54 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -68,7 +68,7 @@ internal void GetBytes(out long sent, out long received) } private IDuplexPipe _ioPipe; - internal bool HasOuputPipe => _ioPipe?.Output != null; + internal bool HasOutputPipe => _ioPipe?.Output != null; private Socket _socket; private Socket VolatileSocket => Volatile.Read(ref _socket); From bed33f807e7918429f13a6e932d37aa881bf905a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 12 Dec 2021 10:42:41 -0500 Subject: [PATCH 020/117] ProcessBacklogs comment --- src/StackExchange.Redis/PhysicalBridge.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 11abe3bca..3cec80391 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -926,6 +926,10 @@ internal enum BacklogStatus : byte } private volatile BacklogStatus _backlogStatus; + /// + /// Process the backlog(s) in play if any. + /// This means flushing commands to an available/active connection (if any) or spinning until timeout if not. + /// private async Task ProcessBacklogsAsync() { _backlogStatus = BacklogStatus.Starting; From 1a343c6bd8a643d190b314dae967780ac199d38c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 12 Dec 2021 10:47:03 -0500 Subject: [PATCH 021/117] Name/comment fixes --- src/StackExchange.Redis/PhysicalBridge.cs | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 3cec80391..24e9fe0fe 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -31,8 +31,8 @@ internal sealed class PhysicalBridge : IDisposable /// We're bypassing the queue for handshake events that go straight to the socket. /// Everything else that's not an internal call goes into the queue if there is a queue. /// - /// In a later release we want to remove per-server events from this queue compeltely and shunt queued messages - /// to another capable primary connection if oone if avaialble to process them faster (order is already hosed). + /// In a later release we want to remove per-server events from this queue completely and shunt queued messages + /// to another capable primary connection if one is available to process them faster (order is already hosed). /// For now, simplicity in: queue it all, replay or timeout it all. /// private readonly ConcurrentQueue _backlog = new(); @@ -413,7 +413,7 @@ internal void KeepAlive() msg.SetInternalCall(); Multiplexer.Trace("Enqueue: " + msg); Multiplexer.OnInfoMessage($"heartbeat ({physical?.LastWriteSecondsAgo}s >= {ServerEndPoint?.WriteEverySeconds}s, {physical?.GetSentAwaitingResponseCount()} waiting) '{msg.CommandAndKey}' on '{PhysicalName}' (v{features.Version})"); - physical?.UpdateLastWriteTime(); // pre-emptively + physical?.UpdateLastWriteTime(); // preemptively #pragma warning disable CS0618 var result = TryWriteSync(msg, ServerEndPoint.IsReplica); #pragma warning restore CS0618 @@ -549,7 +549,7 @@ internal void OnHeartbeat(bool ifConnectedOnly) { if (BacklogHasItems) { - CheckBacklogsForTimeouts(); + CheckBacklogForTimeouts(); // Ensure we're processing the backlog if (BacklogHasItems) { @@ -769,7 +769,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. - // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course + // We do this before attempting to take the write lock, because we won't actually write, we'll just let the backlog get processed in due course if (TryPushToBacklog(message, onlyIfExists: true)) { return WriteResult.Success; // queued counts as success @@ -825,7 +825,7 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandsha return false; } - // Note, for deciding emptyness for whether to push onlyIfExists, and start worker, + // Note, for deciding emptiness for whether to push onlyIfExists, and start worker, // we only need care if WE are able to // see the queue when its empty. Not whether anyone else sees it as empty. // So strong synchronization is not required. @@ -852,7 +852,7 @@ private void StartBacklogProcessor() #endif _backlogStatus = BacklogStatus.Activating; - // Start the backlog processor; this is a bit unorthadox, as you would *expect* this to just + // Start the backlog processor; this is a bit unorthodox, as you would *expect* this to just // be Task.Run; that would work fine when healthy, but when we're falling on our face, it is // easy to get into a thread-pool-starvation "spiral of death" if we rely on the thread-pool // to unblock the thread-pool when there could be sync-over-async callers. Note that in reality, @@ -872,14 +872,14 @@ private void StartBacklogProcessor() /// /// Crawls from the head of the backlog queue, consuming anything that should have timed out - /// and pruning it accoordingly (these messages will get timeout exceptions). + /// and pruning it accordingly (these messages will get timeout exceptions). /// - private void CheckBacklogsForTimeouts() + private void CheckBacklogForTimeouts() { var now = Environment.TickCount; var timeout = TimeoutMilliseconds; - // Because peeking at the backlog, checking message and then dequeueing, is not thread-safe, we do have to use + // Because peeking at the backlog, checking message and then dequeuing, is not thread-safe, we do have to use // a lock here, for mutual exclusion of backlog DEQUEUERS. Unfortunately. // But we reduce contention by only locking if we see something that looks timed out. while (_backlog.TryPeek(out Message message)) @@ -1008,7 +1008,7 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) while (true) { _backlogStatus = BacklogStatus.CheckingForWork; - // We need to lock _backlog when dequeueing because of + // We need to lock _backlog when dequeuing because of // races with timeout processing logic lock (backlog) { @@ -1084,7 +1084,7 @@ private WriteResult TimedOutBeforeWrite(Message message) /// /// This writes a message to the output stream /// - /// The phsyical connection to write to. + /// The physical connection to write to. /// The message to be written. /// Whether this message is part of the handshake process. internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool isHandshake = false) @@ -1106,7 +1106,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. - // We do this before attempting to take the writelock, because we won't actually write, we'll just let the backlog get processed in due course + // We do this before attempting to take the write lock, because we won't actually write, we'll just let the backlog get processed in due course if (TryPushToBacklog(message, onlyIfExists: physical.HasOutputPipe, isHandshake: isHandshake)) { return new ValueTask(WriteResult.Success); // queued counts as success From b7c3dffdafc84f567f0e0ff98a7a7da6a2ef55b8 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 13 Dec 2021 12:00:06 -0500 Subject: [PATCH 022/117] Fix TieBreaker queue - this was going into a never-started backlog This fixes the immediate issue but we need to be really sure we never queue anything awaited in the middle of nowhere into a backlog we never start. Now that I understand the issue, we can probably harden this a bit more. --- src/StackExchange.Redis/ConnectionMultiplexer.cs | 2 +- src/StackExchange.Redis/PhysicalBridge.cs | 2 +- src/StackExchange.Redis/ServerEndPoint.cs | 6 +++--- tests/StackExchange.Redis.Tests/BacklogTests.cs | 9 +++++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 9db4098bc..7ea838f36 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1803,7 +1803,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP Message msg = Message.Create(0, flags, RedisCommand.GET, tieBreakerKey); msg.SetInternalCall(); msg = LoggingMessage.Create(log, msg); - tieBreakers[i] = server.WriteDirectAsync(msg, ResultProcessor.String); + tieBreakers[i] = server.WriteDirectAsync(msg, ResultProcessor.String, isHandshake: true); } } diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 24e9fe0fe..f7fc451e2 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -203,7 +203,7 @@ public WriteResult TryWriteSync(Message message, bool isReplica) public ValueTask TryWriteAsync(Message message, bool isReplica, bool isHandshake = false) { if (isDisposed) throw new ObjectDisposedException(Name); - if (!IsConnected) return new ValueTask(QueueOrFailMessage(message)); + if (!IsConnected && !isHandshake) return new ValueTask(QueueOrFailMessage(message)); var physical = this.physical; if (physical == null) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 959e576cd..664179e99 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -140,7 +140,7 @@ internal PhysicalBridge.State ConnectionState get { var tmp = interactive; - return tmp.ConnectionState; + return tmp?.ConnectionState ?? State.Disconnected; } } @@ -678,7 +678,7 @@ internal void OnHeartbeat() } } - internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null) + internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null, bool isHandshake = false) { static async Task Awaited(ServerEndPoint @this, Message message, ValueTask write, TaskCompletionSource tcs) { @@ -702,7 +702,7 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask Date: Mon, 13 Dec 2021 12:45:20 -0500 Subject: [PATCH 023/117] Yeah....that'd be bad --- src/StackExchange.Redis/BacklogPolicy.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/BacklogPolicy.cs b/src/StackExchange.Redis/BacklogPolicy.cs index b8111944d..7666a90de 100644 --- a/src/StackExchange.Redis/BacklogPolicy.cs +++ b/src/StackExchange.Redis/BacklogPolicy.cs @@ -12,7 +12,7 @@ public class BacklogPolicy /// Backlog behavior matching StackExchange.Redis's 2.x line, failing fast and not attempting to queue /// and retry when a connection is available again. /// - public static BacklogPolicy FailFast = new() + public static BacklogPolicy FailFast { get; } = new() { QueueWhileDisconnected = false, AbortPendingOnConnectionFailure = true, @@ -22,7 +22,7 @@ public class BacklogPolicy /// Default backlog policy which will allow commands to be issues against an endpoint and queue up. /// Commands are still subject to their async timeout (which serves as a queue size check). /// - public static BacklogPolicy Default = new() + public static BacklogPolicy Default { get; } = new() { QueueWhileDisconnected = true, AbortPendingOnConnectionFailure = false, From fc8a100783378dd1047213bf5d785cce937b026e Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 14 Dec 2021 12:16:59 -0500 Subject: [PATCH 024/117] Tiebreakers: move into the handshake Currently the way we handshake is to get everything configured, wait for a tracer to complete, and then issue the tiebreakers to all servers if they are in play. This complicates a few things with respect to timings, duplication, and write paths being a one-off for tie breakers, which I tripped on hard in #1912. In this, we instead move the tie breaker fetch as part of AutoConfigure as a fire-and-forget-process-the-result-later setup with a dedicated processor. This all happens before the tracer fires moving us to the next connection phase (added comments) so we should be safe. It should reduce both complexity and overall connection times proportional to endpoint latency (since we wait for completion right now). What needs adding here is tests with us disabling commands like INFO, GET, etc. and ensuring things still behave as we want. In the overall, the tie breaker is slightly less isolated but _should_ be happening in the same order and with the same exception if any - no net result change is intended there with respect to how we do or don't error along the way. But we never want a connection to fail _because of a tiebreaker_ and I think that warrants a few tests: - [ ] Disable `INFO` and see if we can connect - [ ] Disable `GET` and see if we can connect - [ ] Store some invalid TieBreaker and see if we can connect (e.g. make it a hash instead of a string) ...and maybe others? --- .../ConnectionMultiplexer.cs | 71 +++++-------------- src/StackExchange.Redis/ResultProcessor.cs | 30 ++++++++ src/StackExchange.Redis/ServerEndPoint.cs | 15 ++++ 3 files changed, 61 insertions(+), 55 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 85803df2c..bb33a3f66 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1719,20 +1719,16 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP } int standaloneCount = 0, clusterCount = 0, sentinelCount = 0; var endpoints = RawConfig.EndPoints; - log?.WriteLine($"{endpoints.Count} unique nodes specified"); + bool useTieBreakers = !string.IsNullOrWhiteSpace(RawConfig.TieBreaker); + log?.WriteLine($"{endpoints.Count} unique nodes specified ({(useTieBreakers ? "with" : "without")} tiebreaker)"); if (endpoints.Count == 0) { throw new InvalidOperationException("No nodes to consider"); } -#pragma warning disable CS0618 - const CommandFlags flags = CommandFlags.NoRedirect | CommandFlags.HighPriority; -#pragma warning restore CS0618 List masters = new List(endpoints.Count); - bool useTieBreakers = !string.IsNullOrWhiteSpace(RawConfig.TieBreaker); ServerEndPoint[] servers = null; - Task[] tieBreakers = null; bool encounteredConnectedClusterServer = false; Stopwatch watch = null; @@ -1748,7 +1744,6 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP if (endpoints == null) break; var available = new Task[endpoints.Count]; - tieBreakers = useTieBreakers ? new Task[endpoints.Count] : null; servers = new ServerEndPoint[available.Length]; RedisKey tieBreakerKey = useTieBreakers ? (RedisKey)RawConfig.TieBreaker : default(RedisKey); @@ -1791,22 +1786,6 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP log?.WriteLine($"{Format.ToString(server.EndPoint)}: Endpoint is {server.ConnectionState}"); } - // After we've successfully connected (and authenticated), kickoff tie breakers if needed - if (useTieBreakers) - { - log?.WriteLine($"Election: Gathering tie-breakers..."); - for (int i = 0; i < available.Length; i++) - { - var server = servers[i]; - - log?.WriteLine($"{Format.ToString(server.EndPoint)}: Requesting tie-break (Key=\"{RawConfig.TieBreaker}\")..."); - Message msg = Message.Create(0, flags, RedisCommand.GET, tieBreakerKey); - msg.SetInternalCall(); - msg = LoggingMessage.Create(log, msg); - tieBreakers[i] = server.WriteDirectAsync(msg, ResultProcessor.String); - } - } - EndPointCollection updatedClusterEndpointCollection = null; for (int i = 0; i < available.Length; i++) { @@ -1920,7 +1899,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP ServerSelectionStrategy.ServerType = ServerType.Standalone; } - var preferred = await NominatePreferredMaster(log, servers, useTieBreakers, tieBreakers, masters, timeoutMs: RawConfig.ConnectTimeout - checked((int)watch.ElapsedMilliseconds)).ObserveErrors().ForAwait(); + var preferred = NominatePreferredMaster(log, servers, useTieBreakers, masters); foreach (var master in masters) { if (master == preferred || master.IsReplica) @@ -2050,44 +2029,26 @@ private void ResetAllNonConnected() [System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Partial - may use instance data")] partial void OnTraceLog(LogProxy log, [CallerMemberName] string caller = null); - private static async Task NominatePreferredMaster(LogProxy log, ServerEndPoint[] servers, bool useTieBreakers, Task[] tieBreakers, List masters, int timeoutMs) + private static ServerEndPoint NominatePreferredMaster(LogProxy log, ServerEndPoint[] servers, bool useTieBreakers, List masters) { Dictionary uniques = null; if (useTieBreakers) { // count the votes uniques = new Dictionary(StringComparer.OrdinalIgnoreCase); - log?.WriteLine("Waiting for tiebreakers..."); - await WaitAllIgnoreErrorsAsync("tiebreaker", tieBreakers, Math.Max(timeoutMs, 200), log).ForAwait(); - for (int i = 0; i < tieBreakers.Length; i++) + for (int i = 0; i < servers.Length; i++) { - var ep = servers[i].EndPoint; - var status = tieBreakers[i].Status; - switch (status) + var server = servers[i]; + string serverResult = server.TieBreakerResult; + + if (string.IsNullOrWhiteSpace(serverResult)) { - case TaskStatus.RanToCompletion: - string s = tieBreakers[i].Result; - if (string.IsNullOrWhiteSpace(s)) - { - log?.WriteLine($"Election: {Format.ToString(ep)} had no tiebreaker set"); - } - else - { - log?.WriteLine($"Election: {Format.ToString(ep)} nominates: {s}"); - if (!uniques.TryGetValue(s, out int count)) count = 0; - uniques[s] = count + 1; - } - break; - case TaskStatus.Faulted: - log?.WriteLine($"Election: {Format.ToString(ep)} failed to nominate ({status})"); - foreach (var ex in tieBreakers[i].Exception.InnerExceptions) - { - if (ex.Message.StartsWith("MOVED ") || ex.Message.StartsWith("ASK ")) continue; - log?.WriteLine("> " + ex.Message); - } - break; - default: - log?.WriteLine($"Election: {Format.ToString(ep)} failed to nominate ({status})"); - break; + log?.WriteLine($"Election: {Format.ToString(server)} had no tiebreaker set"); + } + else + { + log?.WriteLine($"Election: {Format.ToString(server)} nominates: {serverResult}"); + if (!uniques.TryGetValue(serverResult, out int count)) count = 0; + uniques[serverResult] = count + 1; } } } diff --git a/src/StackExchange.Redis/ResultProcessor.cs b/src/StackExchange.Redis/ResultProcessor.cs index dd25cfc8d..ec1e78e1f 100644 --- a/src/StackExchange.Redis/ResultProcessor.cs +++ b/src/StackExchange.Redis/ResultProcessor.cs @@ -121,6 +121,7 @@ public static readonly StreamPendingMessagesProcessor public static readonly ResultProcessor String = new StringProcessor(), + TieBreaker = new TieBreakerProcessor(), ClusterNodesRaw = new ClusterNodesRawProcessor(); #region Sentinel @@ -2068,6 +2069,34 @@ protected override bool SetResultCore(PhysicalConnection connection, Message mes } } + private sealed class TieBreakerProcessor : ResultProcessor + { + protected override bool SetResultCore(PhysicalConnection connection, Message message, in RawResult result) + { + switch (result.Type) + { + case ResultType.SimpleString: + case ResultType.BulkString: + var tieBreaker = result.GetString(); + SetResult(message, tieBreaker); + + var bridge = connection.BridgeCouldBeNull; + try + { + var endpoint = bridge?.ServerEndPoint; + if (endpoint != null) + { + endpoint.TieBreakerResult = tieBreaker; + } + } + catch { } + + return true; + } + return false; + } + } + private class TracerProcessor : ResultProcessor { private readonly bool establishConnection; @@ -2146,6 +2175,7 @@ protected override bool SetResultCore(PhysicalConnection connection, Message mes { if (establishConnection) { + // This is what ultimately brings us to complete a connection, by advancing the state forward from a successful tracer after connection. connection.BridgeCouldBeNull?.OnFullyEstablished(connection, $"From command: {message.Command}"); } SetResult(message, happy); diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index a76f5ca3e..578756f30 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -375,6 +375,16 @@ internal async Task AutoConfigureAsync(PhysicalConnection connection, LogProxy l msg.SetInternalCall(); await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.ClusterNodes).ForAwait(); } + // If we are ging to fetch a tie breaker, do so last and we'll get it in before the tracer fires completing the connection + if (!string.IsNullOrEmpty(Multiplexer.RawConfig.TieBreaker)) + { + RedisKey tieBreakerKey = Multiplexer.RawConfig.TieBreaker; + log?.WriteLine($"{Format.ToString(EndPoint)}: Requesting tie-break (Key=\"{tieBreakerKey}\")..."); + msg = Message.Create(0, flags, RedisCommand.GET, tieBreakerKey); + msg.SetInternalCall(); + msg = LoggingMessage.Create(log, msg); + await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.TieBreaker).ForAwait(); + } } private int _nextReplicaOffset; @@ -608,6 +618,11 @@ public EndPoint MasterEndPoint set { SetConfig(ref masterEndPoint, value); } } + /// + /// Result of the latest tie breaker (from the last reconfigure). + /// + internal string TieBreakerResult { get; set; } + internal bool CheckInfoReplication() { lastInfoReplicationCheckTicks = Environment.TickCount; From 7b98bc240fcc01f2965eb7f45a404712bdc9bb53 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 14 Dec 2021 12:40:24 -0500 Subject: [PATCH 025/117] Remove handshake changes --- src/StackExchange.Redis/ServerEndPoint.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index aae898221..54290743c 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -693,7 +693,7 @@ internal void OnHeartbeat() } } - internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null, bool isHandshake = false) + internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null) { static async Task Awaited(ServerEndPoint @this, Message message, ValueTask write, TaskCompletionSource tcs) { @@ -717,7 +717,7 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask Date: Tue, 14 Dec 2021 12:45:11 -0500 Subject: [PATCH 026/117] Rename isHandshake to bypassBacklog The more I look at this, the more I think a message flag is more appropriate. cc @mgravell thoughts on paralleling this to IsInternalCall and eliminating a lot of the changes here? --- src/StackExchange.Redis/PhysicalBridge.cs | 20 ++++++++++---------- src/StackExchange.Redis/ServerEndPoint.cs | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index f7fc451e2..2e0fb8699 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -200,16 +200,16 @@ public WriteResult TryWriteSync(Message message, bool isReplica) return result; } - public ValueTask TryWriteAsync(Message message, bool isReplica, bool isHandshake = false) + public ValueTask TryWriteAsync(Message message, bool isReplica, bool bypassBacklog = false) { if (isDisposed) throw new ObjectDisposedException(Name); - if (!IsConnected && !isHandshake) return new ValueTask(QueueOrFailMessage(message)); + if (!IsConnected && !bypassBacklog) return new ValueTask(QueueOrFailMessage(message)); var physical = this.physical; if (physical == null) { // If we're not connected yet and supposed to, queue it up - if (!isHandshake && Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + if (!bypassBacklog && Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) { if (TryPushToBacklog(message, onlyIfExists: false)) { @@ -220,7 +220,7 @@ public ValueTask TryWriteAsync(Message message, bool isReplica, boo return new ValueTask(FailDueToNoConnection(message)); } - var result = WriteMessageTakingWriteLockAsync(physical, message, isHandshake); + var result = WriteMessageTakingWriteLockAsync(physical, message, bypassBacklog: bypassBacklog); LogNonPreferred(message.Flags, isReplica); return result; } @@ -812,7 +812,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandshake = false) + private bool TryPushToBacklog(Message message, bool onlyIfExists, bool bypassBacklog = false) { // In the handshake case: send the command directly through. // If we're disconnected *in the middle of a handshake*, we've bombed a brand new socket and failing, @@ -820,7 +820,7 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool isHandsha // // Internal calls also shouldn't queue - try immediately. If these aren't errors (most aren't), we // won't alert the user. - if (isHandshake || message.IsInternalCall) + if (bypassBacklog || message.IsInternalCall) { return false; } @@ -1086,8 +1086,8 @@ private WriteResult TimedOutBeforeWrite(Message message) /// /// The physical connection to write to. /// The message to be written. - /// Whether this message is part of the handshake process. - internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool isHandshake = false) + /// Whether this message should bypass the backlog, going straight to the pipe or failing. + internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool bypassBacklog = false) { /* design decision/choice; the code works fine either way, but if this is * set to *true*, then when we can't take the writer-lock *right away*, @@ -1107,7 +1107,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // AVOID REORDERING MESSAGES // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. // We do this before attempting to take the write lock, because we won't actually write, we'll just let the backlog get processed in due course - if (TryPushToBacklog(message, onlyIfExists: physical.HasOutputPipe, isHandshake: isHandshake)) + if (TryPushToBacklog(message, onlyIfExists: physical.HasOutputPipe, bypassBacklog: bypassBacklog)) { return new ValueTask(WriteResult.Success); // queued counts as success } @@ -1125,7 +1125,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? - if (TryPushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, isHandshake: isHandshake)) + if (TryPushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, bypassBacklog: bypassBacklog)) return new ValueTask(WriteResult.Success); // queued counts as success // no backlog... try to wait with the timeout; diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 54290743c..00fea3b57 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -804,7 +804,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co if (connection == null) { Multiplexer.Trace($"{Format.ToString(this)}: Enqueue (async): " + message); - result = GetBridge(message.Command).TryWriteAsync(message, isReplica, isHandshake: true); + result = GetBridge(message.Command).TryWriteAsync(message, isReplica, bypassBacklog: true); } else { @@ -816,7 +816,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co } else { - result = bridge.WriteMessageTakingWriteLockAsync(connection, message, isHandshake: true); + result = bridge.WriteMessageTakingWriteLockAsync(connection, message, bypassBacklog: true); } } From e830cb0f53a26cbd1352fd50d486f636a0a25de1 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 14 Dec 2021 12:50:28 -0500 Subject: [PATCH 027/117] Add release notes --- docs/ReleaseNotes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 424905b89..071147b81 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -4,6 +4,7 @@ - Makes `StreamEntry` constructor public for better unit test experience (#1923 via WeihanLi) - Fix integer overflow error (issue #1926) with 2GiB+ result payloads +- Moved tiebreaker fetching in connections into the handshake phase (streamline + simplification) (#1931 via NickCraver) ## 2.2.88 From a79c89d15971714191eb93349d71f2b048a7b8bb Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 15 Dec 2021 07:35:15 -0500 Subject: [PATCH 028/117] PR fixes! --- src/StackExchange.Redis/ResultProcessor.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/StackExchange.Redis/ResultProcessor.cs b/src/StackExchange.Redis/ResultProcessor.cs index ec1e78e1f..02e221836 100644 --- a/src/StackExchange.Redis/ResultProcessor.cs +++ b/src/StackExchange.Redis/ResultProcessor.cs @@ -2080,11 +2080,9 @@ protected override bool SetResultCore(PhysicalConnection connection, Message mes var tieBreaker = result.GetString(); SetResult(message, tieBreaker); - var bridge = connection.BridgeCouldBeNull; try { - var endpoint = bridge?.ServerEndPoint; - if (endpoint != null) + if (connection.BridgeCouldBeNull?.ServerEndPoint is ServerEndPoint endpoint) { endpoint.TieBreakerResult = tieBreaker; } From fa5200c2d65c3eb19979db21b16cba91d2e95668 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 28 Dec 2021 12:13:01 -0500 Subject: [PATCH 029/117] Tiebreaker: add tests Annnnnnd this is why we add tests, we would have tried to issue the GET and never connected in previous code, bad Craver, bad! --- src/StackExchange.Redis/ServerEndPoint.cs | 5 +- .../ConnectCustomConfig.cs | 58 +++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 578756f30..92d407600 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -375,8 +375,9 @@ internal async Task AutoConfigureAsync(PhysicalConnection connection, LogProxy l msg.SetInternalCall(); await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.ClusterNodes).ForAwait(); } - // If we are ging to fetch a tie breaker, do so last and we'll get it in before the tracer fires completing the connection - if (!string.IsNullOrEmpty(Multiplexer.RawConfig.TieBreaker)) + // If we are going to fetch a tie breaker, do so last and we'll get it in before the tracer fires completing the connection + // But if GETs are disabled on this, do not fail the connection - we just don't get tiebreaker benefits + if (!string.IsNullOrEmpty(Multiplexer.RawConfig.TieBreaker) && Multiplexer.RawConfig.CommandMap.IsAvailable(RedisCommand.GET)) { RedisKey tieBreakerKey = Multiplexer.RawConfig.TieBreaker; log?.WriteLine($"{Format.ToString(EndPoint)}: Requesting tie-break (Key=\"{tieBreakerKey}\")..."); diff --git a/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs b/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs new file mode 100644 index 000000000..a172974c8 --- /dev/null +++ b/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs @@ -0,0 +1,58 @@ +using Xunit; +using Xunit.Abstractions; + +namespace StackExchange.Redis.Tests +{ + public class ConnectCustomConfig : TestBase + { + public ConnectCustomConfig(ITestOutputHelper output) : base (output) { } + + // So we're triggering tiebreakers here + protected override string GetConfiguration() => TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; + + [Theory] + [InlineData("config")] + [InlineData("info")] + [InlineData("get")] + [InlineData("config,get")] + [InlineData("info,get")] + [InlineData("config,info,get")] + public void DisabledCommandsStillConnect(string disabledCommands) + { + using var muxer = Create(allowAdmin: true, disabledCommands: disabledCommands.Split(','), log: Writer); + + var db = muxer.GetDatabase(); + db.Ping(); + Assert.True(db.IsConnected(default(RedisKey))); + } + + [Fact] + public void TieBreakerIntact() + { + using var muxer = Create(allowAdmin: true, log: Writer) as ConnectionMultiplexer; + + var tiebreaker = muxer.GetDatabase().StringGet(muxer.RawConfig.TieBreaker); + Log($"Tiebreaker: {tiebreaker}"); + + var snapshot = muxer.GetServerSnapshot(); + foreach (var server in snapshot) + { + Assert.Equal(tiebreaker, server.TieBreakerResult); + } + } + + [Fact] + public void TieBreakerSkips() + { + using var muxer = Create(allowAdmin: true, disabledCommands: new[] { "get" }, log: Writer) as ConnectionMultiplexer; + Assert.Throws(() => muxer.GetDatabase().StringGet(muxer.RawConfig.TieBreaker)); + + var snapshot = muxer.GetServerSnapshot(); + foreach (var server in snapshot) + { + Assert.True(server.IsConnected); + Assert.Null(server.TieBreakerResult); + } + } + } +} From fb6a1351a68bf9223aed7375f98d7c83eaf9be8a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 28 Dec 2021 12:20:33 -0500 Subject: [PATCH 030/117] Add incorrect tiebreaker type test --- .../ConnectCustomConfig.cs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs b/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs index a172974c8..2bed55caa 100644 --- a/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs +++ b/tests/StackExchange.Redis.Tests/ConnectCustomConfig.cs @@ -54,5 +54,24 @@ public void TieBreakerSkips() Assert.Null(server.TieBreakerResult); } } + + [Fact] + public void TiebreakerIncorrectType() + { + var tiebreakerKey = Me(); + using var fubarMuxer = Create(allowAdmin: true, log: Writer); + // Store something nonsensical in the tiebreaker key: + fubarMuxer.GetDatabase().HashSet(tiebreakerKey, "foo", "bar"); + + // Ensure the next connection getting an invalid type still connects + using var muxer = Create(allowAdmin: true, tieBreaker: tiebreakerKey, log: Writer); + + var db = muxer.GetDatabase(); + db.Ping(); + Assert.True(db.IsConnected(default(RedisKey))); + + var ex = Assert.Throws(() => db.StringGet(tiebreakerKey)); + Assert.Contains("WRONGTYPE", ex.Message); + } } } From 7fccba9b48d2c072789aded456d42c398a8180df Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 09:15:05 -0500 Subject: [PATCH 031/117] Fix ServerTakesPrecendenceOverSnapshot behavior We expect this to recovery by default - need to explicitly want fail fast to avoid the race. --- tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index a22e3a133..0bf7e670c 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -63,7 +63,7 @@ public void ServerTakesPrecendenceOverSnapshot() { try { - using (var muxer = Create(keepAlive: 1, connectTimeout: 10000, allowAdmin: true, shared: false)) + using (var muxer = Create(keepAlive: 1, connectTimeout: 10000, allowAdmin: true, shared: false, backlogPolicy: BacklogPolicy.FailFast)) { muxer.GetDatabase(); muxer.AllowConnect = false; From 410bf80e637e6f047157559bbce03f513ed485f4 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 12:44:20 -0500 Subject: [PATCH 032/117] Pull in the Area Fix Pulls in the https://github.com/mgravell/Pipelines.Sockets.Unofficial/pull/63 set of changes --- Directory.Build.targets | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index ac2529fe4..b52c8705b 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -13,7 +13,7 @@ - + @@ -25,6 +25,6 @@ - + \ No newline at end of file From b364d47bb6a5bb7fd20f441209b0c40d04903691 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 12:44:52 -0500 Subject: [PATCH 033/117] Fix naming and remove unused asyncState Still trying to reason if we could/should combine paths here but subtle differences...eliminating the fluff. --- src/StackExchange.Redis/ServerEndPoint.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 4d585be0d..f602c6c35 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -92,7 +92,7 @@ async Task IfConnectedAsync(LogProxy log, bool sendTracerIfConnected, bo } if (sendTracerIfConnected) { - await SendTracer(log).ForAwait(); + await SendTracerAsync(log).ForAwait(); } log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync already connected end"); return "Already connected"; @@ -694,7 +694,7 @@ internal void OnHeartbeat() } } - internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null) + internal Task WriteDirectAsync(Message message, ResultProcessor processor, PhysicalBridge bridge = null) { static async Task Awaited(ServerEndPoint @this, Message message, ValueTask write, TaskCompletionSource tcs) { @@ -707,7 +707,7 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask.Create(out var tcs, asyncState); + var source = TaskResultBox.Create(out var tcs, null); message.SetSource(processor, source); if (bridge == null) bridge = GetBridge(message.Command); @@ -751,7 +751,7 @@ internal void ReportNextFailure() subscription?.ReportNextFailure(); } - internal Task SendTracer(LogProxy log = null) + internal Task SendTracerAsync(LogProxy log = null) { var msg = GetTracerMessage(false); msg = LoggingMessage.Create(log, msg); @@ -905,6 +905,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) if (configChannel != null) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.SUBSCRIBE, (RedisChannel)configChannel); + msg.SetInternalCall(); await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.TrackSubscriptions).ForAwait(); } } From b280c874f46f24432035771db7f002c8c02fca1c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 12:45:17 -0500 Subject: [PATCH 034/117] Issue922_ReconnectRaised: Add logging This can be flaky locally rarely - add the connection logging. --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 8b80fc56f..bace5b19d 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -106,7 +106,7 @@ public async Task Issue922_ReconnectRaised() int failCount = 0, restoreCount = 0; - using (var muxer = ConnectionMultiplexer.Connect(config)) + using (var muxer = ConnectionMultiplexer.Connect(config, log: Writer)) { muxer.ConnectionFailed += delegate { Interlocked.Increment(ref failCount); }; muxer.ConnectionRestored += delegate { Interlocked.Increment(ref restoreCount); }; From 7b423667d0a57034818936a8aaa89e606d32088d Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 14:52:54 -0500 Subject: [PATCH 035/117] Backlog: handle timeouts better These are 2 big changes here. 1: EVERYTHING timeouts out in the backlog queue, all of it (so no need for flags anymore - everything goes. 2: The backlog purge was still happening like lemmings off a cliff in the race case of a backlog run triggered by heartbeat or queue during the disconnected period. A command that hadn't timeout out was immediately retried...off a cliff. This instead waits for being connected before we exercise that, and eliminates the timeout duplication (we already dequeue the message in the backlog loop, where we need a peek approach for timeouts). A minor change is adding `TotalBacklogMessagesQueued` for debugging here. We still need some sync tests added, but getting there! In general this simplifies things to: 1. Always timeout anything in the backlog 2. Always check timeouts in the backlog (via 1 code path now) 3. If we're connected, try anything that wasn't timed out - if we're not connected eject and let the next heartbeat or queue kick us off. The flakiness locally in Issue922_ReconnectRaised seems unrelated, but adding more logging to find out what that's about. --- src/StackExchange.Redis/Enums/CommandFlags.cs | 2 - src/StackExchange.Redis/Message.cs | 37 +++---- src/StackExchange.Redis/PhysicalBridge.cs | 102 +++++++++--------- src/StackExchange.Redis/PhysicalConnection.cs | 5 +- .../StackExchange.Redis.Tests/BacklogTests.cs | 21 +++- .../ConnectingFailDetection.cs | 12 ++- 6 files changed, 95 insertions(+), 84 deletions(-) diff --git a/src/StackExchange.Redis/Enums/CommandFlags.cs b/src/StackExchange.Redis/Enums/CommandFlags.cs index 286e19cd6..4578c4403 100644 --- a/src/StackExchange.Redis/Enums/CommandFlags.cs +++ b/src/StackExchange.Redis/Enums/CommandFlags.cs @@ -82,7 +82,5 @@ public enum CommandFlags /// Indicates that script-related operations should use EVAL, not SCRIPT LOAD + EVALSHA /// NoScriptCache = 512, - - // 1024: used for timed-out; never user-specified, so not visible on the public API } } diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index c8fdf54f8..44004edbe 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -73,8 +73,7 @@ internal void SetBacklogState(int position, PhysicalConnection physical) protected RedisCommand command; private const CommandFlags AskingFlag = (CommandFlags)32, - ScriptUnavailableFlag = (CommandFlags)256, - NeedsAsyncTimeoutCheckFlag = (CommandFlags)1024; + ScriptUnavailableFlag = (CommandFlags)256; private const CommandFlags MaskMasterServerPreference = CommandFlags.DemandMaster | CommandFlags.DemandReplica @@ -697,29 +696,22 @@ internal void SetRequestSent() [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetWriteTime() { - if ((Flags & NeedsAsyncTimeoutCheckFlag) != 0) - { - _writeTickCount = Environment.TickCount; // note this might be reset if we resend a message, cluster-moved etc; I'm OK with that - } + _writeTickCount = Environment.TickCount; // note this might be reset if we resend a message, cluster-moved etc; I'm OK with that } private int _writeTickCount; public int GetWriteTime() => Volatile.Read(ref _writeTickCount); - private void SetNeedsTimeoutCheck() => Flags |= NeedsAsyncTimeoutCheckFlag; - internal bool HasAsyncTimedOut(int now, int timeoutMilliseconds, out int millisecondsTaken) + /// + /// Checks if this message has violated the provided timeout. + /// Whether it's a sync operation in a .Wait() or in the backlog queue or written/pending asynchronously, we need to timeout everything. + /// ...or we get indefinite Task hangs for completions. + /// + internal bool HasTimedOut(int now, int timeoutMilliseconds, out int millisecondsTaken) { - if ((Flags & NeedsAsyncTimeoutCheckFlag) != 0) - { - millisecondsTaken = unchecked(now - _writeTickCount); // note: we can't just check "if sent < cutoff" because of wrap-aro - if (millisecondsTaken >= timeoutMilliseconds) - { - Flags &= ~NeedsAsyncTimeoutCheckFlag; // note: we don't remove it from the queue - still might need to marry it up; but: it is toast - return true; - } - } - else + millisecondsTaken = unchecked(now - _writeTickCount); // note: we can't just check "if sent < cutoff" because of wrap-aro + if (millisecondsTaken >= timeoutMilliseconds) { - millisecondsTaken = default; + return true; } return false; } @@ -745,16 +737,17 @@ internal void SetPreferReplica() Flags = (Flags & ~MaskMasterServerPreference) | CommandFlags.PreferReplica; } + /// + /// Note order here reversed to prevent overload resolution errors + /// internal void SetSource(ResultProcessor resultProcessor, IResultBox resultBox) - { // note order here reversed to prevent overload resolution errors - if (resultBox != null && resultBox.IsAsync) SetNeedsTimeoutCheck(); + { this.resultBox = resultBox; this.resultProcessor = resultProcessor; } internal void SetSource(IResultBox resultBox, ResultProcessor resultProcessor) { - if (resultBox != null && resultBox.IsAsync) SetNeedsTimeoutCheck(); this.resultBox = resultBox; this.resultProcessor = resultProcessor; } diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 2e0fb8699..b3baf3f82 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -38,6 +38,7 @@ internal sealed class PhysicalBridge : IDisposable private readonly ConcurrentQueue _backlog = new(); private bool BacklogHasItems => !_backlog.IsEmpty; private int _backlogProcessorIsRunning = 0; + private long _backlogTotalEnqueued = 0; private int activeWriters = 0; private int beating; @@ -147,6 +148,7 @@ private WriteResult QueueOrFailMessage(Message message) message.SetEnqueued(null); message.SetBacklogState(_backlog.Count, null); _backlog.Enqueue(message); + Interlocked.Increment(ref _backlogTotalEnqueued); return WriteResult.Success; // we'll take it... } else if (Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) @@ -154,6 +156,7 @@ private WriteResult QueueOrFailMessage(Message message) message.SetEnqueued(null); message.SetBacklogState(_backlog.Count, null); _backlog.Enqueue(message); + Interlocked.Increment(ref _backlogTotalEnqueued); return WriteResult.Success; // we'll queue for retry here... } else @@ -343,6 +346,11 @@ internal readonly struct BridgeStatus /// public BacklogStatus BacklogStatus { get; init; } + /// + /// The number of messages ever added to the backlog queue in the life of this connection. + /// + public long TotalBacklogMessagesQueued { get; init; } + /// /// Status for the underlying . /// @@ -360,6 +368,7 @@ internal readonly struct BridgeStatus IsWriterActive = !_singleWriterMutex.IsAvailable, BacklogMessagesPending = _backlog.Count, BacklogStatus = _backlogStatus, + TotalBacklogMessagesQueued = _backlogTotalEnqueued, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, }; @@ -549,12 +558,11 @@ internal void OnHeartbeat(bool ifConnectedOnly) { if (BacklogHasItems) { - CheckBacklogForTimeouts(); - // Ensure we're processing the backlog - if (BacklogHasItems) - { - StartBacklogProcessor(); - } + // If we have a backlog, kickoff the processing + // This will first timeout any messages that have sat too long and either: + // A: Abort if we're still not connected yet (we should be in this path) + // or B: Process the backlog and send those messages through the pipe + StartBacklogProcessor(); } runThisTime = !isDisposed && Interlocked.CompareExchange(ref beating, 1, 0) == 0; @@ -834,6 +842,7 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool bypassBac int count = _backlog.Count; message.SetBacklogState(count, physical); _backlog.Enqueue(message); + Interlocked.Increment(ref _backlogTotalEnqueued); // The correct way to decide to start backlog process is not based on previously empty // but based on a) not empty now (we enqueued!) and b) no backlog processor already running. @@ -874,7 +883,7 @@ private void StartBacklogProcessor() /// Crawls from the head of the backlog queue, consuming anything that should have timed out /// and pruning it accordingly (these messages will get timeout exceptions). /// - private void CheckBacklogForTimeouts() + private void CheckBacklogForTimeouts(ConcurrentQueue backlog) { var now = Environment.TickCount; var timeout = TimeoutMilliseconds; @@ -886,15 +895,15 @@ private void CheckBacklogForTimeouts() { // See if the message has pass our async timeout threshold // or has otherwise been completed (e.g. a sync wait timed out) which would have cleared the ResultBox - if (message.HasAsyncTimedOut(now, timeout, out var _) || message.ResultBox == null) break; // not a timeout - we can stop looking - lock (_backlog) + if (message.HasTimedOut(now, timeout, out var _) || message.ResultBox == null) break; // not a timeout - we can stop looking + lock (backlog) { // Peek again since we didn't have lock before... // and rerun the exact same checks as above, note that it may be a different message now - if (!_backlog.TryPeek(out message)) break; - if (!message.HasAsyncTimedOut(now, timeout, out var _) && message.ResultBox != null) break; + if (!backlog.TryPeek(out message)) break; + if (!message.HasTimedOut(now, timeout, out var _) && message.ResultBox != null) break; - if (!_backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real + if (!backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real { throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); } @@ -915,6 +924,7 @@ internal enum BacklogStatus : byte Started, CheckingForWork, CheckingForTimeout, + CheckingForTimeoutComplete, RecordingTimeout, WritingMessage, Flushing, @@ -983,7 +993,13 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) #endif _backlogStatus = BacklogStatus.Starting; - while (true) + // First eliminate any messages that have timed out already. + _backlogStatus = BacklogStatus.CheckingForTimeout; + CheckBacklogForTimeouts(backlog); + _backlogStatus = BacklogStatus.CheckingForTimeoutComplete; + + // For the rest of the backlog, if we're not connected there's no point - abort out + while (IsConnected) { // check whether the backlog is empty *before* even trying to get the lock if (backlog.IsEmpty) return; // nothing to do @@ -1001,56 +1017,40 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) int acquiredTime = Environment.TickCount; var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); #endif - - // so now we are the writer; write some things! - Message message; - var timeout = TimeoutMilliseconds; - while (true) + // Only execute if we're connected. + // Timeouts are handled above, so we're exclusively into backlog items eligible to write at this point. + // If we can't write them, abort and wait for the next heartbeat or activation to try this again. + while (IsConnected && physical?.HasOutputPipe == true) { + Message message; _backlogStatus = BacklogStatus.CheckingForWork; + // We need to lock _backlog when dequeuing because of - // races with timeout processing logic + // races with timeout processing logic (e.g. next heartbeat hitting us lock (backlog) { - if (!backlog.TryDequeue(out message)) break; // all done + // Note that we're actively taking it off the queue here, not peeking + // If there's nothing left in queue, we're done. + if (!backlog.TryDequeue(out message)) break; } try { - _backlogStatus = BacklogStatus.CheckingForTimeout; - if (message.HasAsyncTimedOut(Environment.TickCount, timeout, out var _)) + _backlogStatus = BacklogStatus.WritingMessage; + var result = WriteMessageInsideLock(physical, message); + + if (result == WriteResult.Success) { - _backlogStatus = BacklogStatus.RecordingTimeout; - var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); -#if DEBUG // additional tracking - ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; - ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; - if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; - if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); - var maxFlush = physical?.MaxFlushTime ?? -1; - if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); - if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; -#endif - message.SetExceptionAndComplete(ex, this); + _backlogStatus = BacklogStatus.Flushing; + result = await physical.FlushAsync(false).ConfigureAwait(false); } - else if (physical?.HasOutputPipe == true) - { - _backlogStatus = BacklogStatus.WritingMessage; - var result = WriteMessageInsideLock(physical, message); - - if (result == WriteResult.Success) - { - _backlogStatus = BacklogStatus.Flushing; - result = await physical.FlushAsync(false).ConfigureAwait(false); - } - _backlogStatus = BacklogStatus.MarkingInactive; - if (result != WriteResult.Success) - { - _backlogStatus = BacklogStatus.RecordingWriteFailure; - var ex = Multiplexer.GetException(result, message, ServerEndPoint); - HandleWriteException(message, ex); - } + _backlogStatus = BacklogStatus.MarkingInactive; + if (result != WriteResult.Success) + { + _backlogStatus = BacklogStatus.RecordingWriteFailure; + var ex = Multiplexer.GetException(result, message, ServerEndPoint); + HandleWriteException(message, ex); } } catch (Exception ex) diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 8588fcc54..3405e1cb0 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -633,7 +633,8 @@ internal void OnBridgeHeartbeat() var timeout = bridge.Multiplexer.AsyncTimeoutMilliseconds; foreach (var msg in _writtenAwaitingResponse) { - if (msg.HasAsyncTimedOut(now, timeout, out var elapsed)) + // We only handle async timeouts here, synchronous timeouts are handled upstream. + if (msg.ResultBoxIsAsync && msg.HasTimedOut(now, timeout, out var elapsed)) { bool haveDeltas = msg.TryGetPhysicalState(out _, out _, out long sentDelta, out var receivedDelta) && sentDelta >= 0 && receivedDelta >= 0; var timeoutEx = ExceptionFactory.Timeout(bridge.Multiplexer, haveDeltas @@ -643,7 +644,7 @@ internal void OnBridgeHeartbeat() msg.SetExceptionAndComplete(timeoutEx, bridge); // tell the message that it is doomed bridge.Multiplexer.OnAsyncTimeout(); } - // note: it is important that we **do not** remove the message unless we're tearing down the socket; that + // Note: it is important that we **do not** remove the message unless we're tearing down the socket; that // would disrupt the chain for MatchResult; we just pre-emptively abort the message from the caller's // perspective, and set a flag on the message so we don't keep doing it } diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index 7eb51510d..7cf470efd 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -126,6 +126,8 @@ public async Task QueuesAndFlushesAfterReconnecting() options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); + muxer.ErrorMessage += (s, e) => Writer.WriteLine($"Error Message {e.EndPoint}: {e.Message}"); + muxer.InternalError += (s, e) => Writer.WriteLine($"Internal Error {e.EndPoint}: {e.Exception.Message}"); var db = muxer.GetDatabase(); Writer.WriteLine("Test: Initial (connected) ping"); @@ -143,8 +145,8 @@ public async Task QueuesAndFlushesAfterReconnecting() // Queue up some commands Writer.WriteLine("Test: Disconnected pings"); - _ = db.PingAsync(); - _ = db.PingAsync(); + var ignoredA = db.PingAsync(); + var ignoredB = db.PingAsync(); var lastPing = db.PingAsync(); // TODO: Add specific server call @@ -158,10 +160,19 @@ public async Task QueuesAndFlushesAfterReconnecting() Writer.WriteLine("Test: Awaiting reconnect"); await UntilCondition(TimeSpan.FromSeconds(3), () => muxer.IsConnected).ForAwait(); - Writer.WriteLine("Test: Awaiting ping1"); + Writer.WriteLine("Test: Checking reconnected 1"); + Assert.True(muxer.IsConnected); + + Writer.WriteLine("Test: ignoredA Status: " + ignoredA.Status); + Writer.WriteLine("Test: ignoredB Status: " + ignoredB.Status); + Writer.WriteLine("Test: lastPing Status: " + lastPing.Status); + var afterConnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Writer.WriteLine($"Test: BacklogStatus: {afterConnectedStats.BacklogStatus}, BacklogMessagesPending: {afterConnectedStats.BacklogMessagesPending}, IsWriterActive: {afterConnectedStats.IsWriterActive}, MessagesSinceLastHeartbeat: {afterConnectedStats.MessagesSinceLastHeartbeat}, TotalBacklogMessagesQueued: {afterConnectedStats.TotalBacklogMessagesQueued}"); + + Writer.WriteLine("Test: Awaiting lastPing 1"); await lastPing; - Writer.WriteLine("Test: Checking reconnected"); + Writer.WriteLine("Test: Checking reconnected 2"); Assert.True(muxer.IsConnected); var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); @@ -175,7 +186,7 @@ public async Task QueuesAndFlushesAfterReconnecting() // We should see none queued Writer.WriteLine("Test: BacklogMessagesPending check"); Assert.Equal(0, stats.BacklogMessagesPending); - Writer.WriteLine("Test: Awaiting lastPing"); + Writer.WriteLine("Test: Awaiting lastPing 2"); await lastPing; Writer.WriteLine("Test: Done"); } diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index bace5b19d..5b63e747b 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -108,8 +108,16 @@ public async Task Issue922_ReconnectRaised() using (var muxer = ConnectionMultiplexer.Connect(config, log: Writer)) { - muxer.ConnectionFailed += delegate { Interlocked.Increment(ref failCount); }; - muxer.ConnectionRestored += delegate { Interlocked.Increment(ref restoreCount); }; + muxer.ConnectionFailed += (s, e) => + { + Interlocked.Increment(ref failCount); + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; + muxer.ConnectionRestored += (s, e) => + { + Interlocked.Increment(ref restoreCount); + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; muxer.GetDatabase(); Assert.Equal(0, Volatile.Read(ref failCount)); From 784d78af6ec1c41bea777f9783f522b5bb6ece11 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 15:04:06 -0500 Subject: [PATCH 036/117] Revert subscribe -> internal Further changes obsoleted this change - helps re-subscribe after failure. --- src/StackExchange.Redis/ServerEndPoint.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index f602c6c35..72baa69f3 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -897,7 +897,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) log?.WriteLine($"{Format.ToString(this)}: Sending critical tracer (handshake): {tracer.CommandAndKey}"); await WriteDirectOrQueueFireAndForgetAsync(connection, tracer, ResultProcessor.EstablishConnection).ForAwait(); - // note: this **must** be the last thing on the subscription handshake, because after this + // Note: this **must** be the last thing on the subscription handshake, because after this // we will be in subscriber mode: regular commands cannot be sent if (connType == ConnectionType.Subscription) { @@ -905,7 +905,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) if (configChannel != null) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.SUBSCRIBE, (RedisChannel)configChannel); - msg.SetInternalCall(); + // Note: this is NOT internal, we want it to queue in a backlog for sending when ready if necessary await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.TrackSubscriptions).ForAwait(); } } From 582e5f16b666cddeb210476cdf946643f7ca5189 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 15:05:01 -0500 Subject: [PATCH 037/117] Issue922: Fix assumptions This was sometimes killing subscription and sometimes not in time given the new simulation path. Instead, only kill what we expect there. --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 5b63e747b..0ecca0b63 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -124,7 +124,7 @@ public async Task Issue922_ReconnectRaised() Assert.Equal(0, Volatile.Read(ref restoreCount)); var server = muxer.GetServer(TestConfig.Current.MasterServerAndPort); - server.SimulateConnectionFailure(SimulatedFailureType.All); + server.SimulateConnectionFailure(SimulatedFailureType.InteractiveInbound | SimulatedFailureType.InteractiveOutbound); await UntilCondition(TimeSpan.FromSeconds(10), () => Volatile.Read(ref failCount) + Volatile.Read(ref restoreCount) == 4); // interactive+subscriber = 2 From b8d2636c28bfff245c290e92a3e22e9bea03cb15 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 15:17:57 -0500 Subject: [PATCH 038/117] Increase PubSubGetAllCorrectOrder_OnMessage_Async gap --- tests/StackExchange.Redis.Tests/PubSub.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index e4d22798d..c0efedeb4 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -520,6 +520,9 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); + // Give a delay between subscriptions and when we try to publish to be safe + await Task.Delay(1000).ForAwait(); + lock (syncLock) { for (int i = 0; i < count; i++) From 91339f11ffa5a415db0c05bddce99523f2d5ce4d Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 16:21:01 -0500 Subject: [PATCH 039/117] PubSub tests: make it more specific and resilient This wasn't testing a reliable thing before...actually test what we're aiming for and it should also be more stable. --- tests/StackExchange.Redis.Tests/PubSub.cs | 36 ++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index c0efedeb4..099fe91e9 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -746,7 +746,7 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - using (var muxer = Create(allowAdmin: true, shared: false)) + using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000)) { RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); @@ -755,22 +755,44 @@ await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); }).ConfigureAwait(false); + await Task.Delay(200).ConfigureAwait(false); + await sub.PublishAsync(channel, "abc").ConfigureAwait(false); sub.Ping(); await Task.Delay(200).ConfigureAwait(false); - Assert.Equal(1, Thread.VolatileRead(ref counter)); + + var counter1 = Thread.VolatileRead(ref counter); + Log($"Expecting 1 messsage, got {counter1}"); + Assert.Equal(1, counter1); + var server = GetServer(muxer); - Assert.Equal(1, server.GetCounters().Subscription.SocketCount); + var socketCount = server.GetCounters().Subscription.SocketCount; + Log($"Expecting 1 socket, got {socketCount}"); + Assert.Equal(1, socketCount); + // We might fail both connections or just the primary in the time period + SetExpectedAmbientFailureCount(-1); + + // Make sure we fail all the way + muxer.AllowConnect = false; + // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); - SetExpectedAmbientFailureCount(2); + // Trigger failure + Assert.Throws(() => sub.Ping()); + Assert.False(server.IsConnected); + + // Now reconnect... + muxer.AllowConnect = true; + // Wait until we're reconnected + await UntilCondition(TimeSpan.FromSeconds(5), () => server.IsConnected); + // And time to resubscribe... await Task.Delay(200).ConfigureAwait(false); sub.Ping(); - Assert.Equal(2, server.GetCounters().Subscription.SocketCount); + await sub.PublishAsync(channel, "abc").ConfigureAwait(false); - await Task.Delay(200).ConfigureAwait(false); - sub.Ping(); + // Give it a few seconds to get our messages + await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); Assert.Equal(2, Thread.VolatileRead(ref counter)); } } From 301844299f8b5f2b1e97e2ce9a607f116be49bb4 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 4 Jan 2022 16:40:25 -0500 Subject: [PATCH 040/117] Add more data to this bugger... --- src/StackExchange.Redis/RedisSubscriber.cs | 8 ++++++++ tests/StackExchange.Redis.Tests/PubSub.cs | 9 +++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 8884dc263..d8d575286 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -13,6 +13,14 @@ public partial class ConnectionMultiplexer { private readonly Dictionary subscriptions = new Dictionary(); + internal int GetSubscriptionsCount() + { + lock (subscriptions) + { + return subscriptions.Count; + } + } + internal static void CompleteAsWorker(ICompletable completable) { if (completable != null) ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 099fe91e9..724f85311 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -746,7 +746,7 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000)) + using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) { RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); @@ -755,6 +755,7 @@ await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); }).ConfigureAwait(false); + Assert.Equal(1, muxer.GetSubscriptionsCount()); await Task.Delay(200).ConfigureAwait(false); @@ -789,11 +790,15 @@ await sub.SubscribeAsync(channel, delegate // And time to resubscribe... await Task.Delay(200).ConfigureAwait(false); sub.Ping(); + Assert.Equal(1, muxer.GetSubscriptionsCount()); await sub.PublishAsync(channel, "abc").ConfigureAwait(false); // Give it a few seconds to get our messages await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); - Assert.Equal(2, Thread.VolatileRead(ref counter)); + + var counter2 = Thread.VolatileRead(ref counter); + Log($"Expecting 2 messsages, got {counter2}"); + Assert.Equal(2, counter2); } } } From ade853b706ebb81ece48c78f4842a62a84d06413 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 5 Jan 2022 11:22:23 -0500 Subject: [PATCH 041/117] Subscriber: simplify This adds profiling to the test as well as fixes the `PING` going over the interactive connection as well as the `IsConnected(channel)` on `RedisSubscriber` ultimately telling us if the interactive (rather than subscription) is active - now we check both bridges. There's still a race issue with subscriptions being restored from the `Task.Run()` in `Task.Run(() => ExecuteSubscriptionLoop());` (`GetSubscriptionQueue`) that we need to resolve, probably relying on the backlog or issuing it immediately after the handshake. --- src/StackExchange.Redis/RedisSubscriber.cs | 49 ++++++++------------- src/StackExchange.Redis/ServerEndPoint.cs | 1 + tests/StackExchange.Redis.Tests/PubSub.cs | 51 ++++++++++++++++++++-- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index d8d575286..adca3f80c 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -174,11 +174,10 @@ internal void ResendSubscriptions(ServerEndPoint server) internal bool SubscriberConnected(in RedisChannel channel = default(RedisChannel)) { - var server = GetSubscribedServer(channel); - if (server != null) return server.IsConnected; + // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster + var server = GetSubscribedServer(channel) ?? SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); - server = SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); - return server?.IsConnected == true; + return server?.IsConnected == true && server.IsSubscriberConnected; } internal long ValidateSubscriptions() @@ -228,6 +227,7 @@ public bool Remove(Action handler, ChannelMessageQueue public Task SubscribeToServer(ConnectionMultiplexer multiplexer, in RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { + // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster var selected = multiplexer.SelectServer(RedisCommand.SUBSCRIBE, flags, default(RedisKey)); var bridge = selected?.GetBridge(ConnectionType.Subscription, true); if (bridge == null) return null; @@ -305,14 +305,13 @@ private PendingSubscriptionState(object asyncState, RedisChannel channel, Subscr internal void Resubscribe(in RedisChannel channel, ServerEndPoint server) { - if (server != null && Interlocked.CompareExchange(ref owner, server, server) == server) + // Only re-subscribe to the original server + if (server != null && GetOwner() == server) { var cmd = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; var msg = Message.Create(-1, CommandFlags.FireAndForget, cmd, channel); msg.SetInternalCall(); -#pragma warning disable CS0618 - server.WriteDirectFireAndForgetSync(msg, ResultProcessor.TrackSubscriptions); -#pragma warning restore CS0618 + server.Multiplexer.ExecuteSyncImpl(msg, ResultProcessor.TrackSubscriptions, server); } } @@ -428,36 +427,24 @@ public Task IdentifyEndpointAsync(RedisChannel channel, CommandFlags f public override TimeSpan Ping(CommandFlags flags = CommandFlags.None) { - var msg = CreatePingMessage(flags, out var server); - return ExecuteSync(msg, ResultProcessor.ResponseTimer, server); + var msg = CreatePingMessage(flags); + return ExecuteSync(msg, ResultProcessor.ResponseTimer); } public override Task PingAsync(CommandFlags flags = CommandFlags.None) { - var msg = CreatePingMessage(flags, out var server); - return ExecuteAsync(msg, ResultProcessor.ResponseTimer, server); + var msg = CreatePingMessage(flags); + return ExecuteAsync(msg, ResultProcessor.ResponseTimer); } - private Message CreatePingMessage(CommandFlags flags, out ServerEndPoint server) + private Message CreatePingMessage(CommandFlags flags) { - bool usePing = false; - server = null; - if (multiplexer.CommandMap.IsAvailable(RedisCommand.PING)) - { - try { usePing = GetFeatures(default, flags, out server).PingOnSubscriber; } - catch { } - } - - if (usePing) - { - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); - } - else - { - // can't use regular PING, but we can unsubscribe from something random that we weren't even subscribed to... - RedisValue channel = multiplexer.UniqueId; - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); - } + // We're explicitly NOT using PING here because GetBridge() would send this over the interactive connection + // rather than the subscription connection we intend. + RedisValue channel = multiplexer.UniqueId; + var message = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); + message.SetInternalCall(); + return message; } public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 72baa69f3..ab89e8fc8 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -73,6 +73,7 @@ public ServerEndPoint(ConnectionMultiplexer multiplexer, EndPoint endpoint) public bool HasDatabases => serverType == ServerType.Standalone; public bool IsConnected => interactive?.IsConnected == true; + public bool IsSubscriberConnected => subscription?.IsConnected == true; public bool IsConnecting => interactive?.IsConnecting == true; diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 724f85311..aa14cdf21 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -1,11 +1,13 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; using StackExchange.Redis.Maintenance; +using StackExchange.Redis.Profiling; using Xunit; using Xunit.Abstractions; // ReSharper disable AccessToModifiedClosure @@ -746,8 +748,10 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { + var session = new ProfilingSession(); using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) { + muxer.RegisterProfiler(() => session); RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); int counter = 0; @@ -755,6 +759,15 @@ await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); }).ConfigureAwait(false); + + var profile1 = session.FinishProfiling(); + foreach (var command in profile1) + { + Log($"{command.EndPoint}: {command}"); + } + // We shouldn't see the initial connection here + Assert.Equal(0, profile1.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + Assert.Equal(1, muxer.GetSubscriptionsCount()); await Task.Delay(200).ConfigureAwait(false); @@ -777,28 +790,58 @@ await sub.SubscribeAsync(channel, delegate // Make sure we fail all the way muxer.AllowConnect = false; + Log("Failing connection"); // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); // Trigger failure Assert.Throws(() => sub.Ping()); - Assert.False(server.IsConnected); + Assert.False(sub.IsConnected(channel)); // Now reconnect... muxer.AllowConnect = true; + Log("Waiting on reconnect"); // Wait until we're reconnected - await UntilCondition(TimeSpan.FromSeconds(5), () => server.IsConnected); + await UntilCondition(TimeSpan.FromSeconds(10), () => sub.IsConnected(channel)); + Log("Reconnected"); + // Ensure we're reconnected + Assert.True(sub.IsConnected(channel)); + // And time to resubscribe... - await Task.Delay(200).ConfigureAwait(false); + await Task.Delay(1000).ConfigureAwait(false); + + // Ensure we've sent the subscribe command after reconnecting + var profile2 = session.FinishProfiling(); + foreach (var command in profile2) + { + Log($"{command.EndPoint}: {command}"); + } + //Assert.Equal(1, profile2.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + + Log($"Issuing ping after reconnected"); sub.Ping(); Assert.Equal(1, muxer.GetSubscriptionsCount()); - await sub.PublishAsync(channel, "abc").ConfigureAwait(false); + Log("Publishing"); + var published = await sub.PublishAsync(channel, "abc").ConfigureAwait(false); + + Log($"Published to {published} subscriber(s)."); + Assert.Equal(1, published); + // Give it a few seconds to get our messages + Log("Waiting for 2 messages"); await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); var counter2 = Thread.VolatileRead(ref counter); Log($"Expecting 2 messsages, got {counter2}"); Assert.Equal(2, counter2); + + // Log all commands at the end + Log("All commands since connecting:"); + var profile3 = session.FinishProfiling(); + foreach (var command in profile3) + { + Log($"{command.EndPoint}: {command}"); + } } } } From 1f946d4c82504fca068437d86b5961556cfdd4be Mon Sep 17 00:00:00 2001 From: mgravell Date: Thu, 6 Jan 2022 13:53:00 +0000 Subject: [PATCH 042/117] add new client flags --- src/StackExchange.Redis/ClientInfo.cs | 9 ++++++++- src/StackExchange.Redis/Enums/ClientFlags.cs | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ClientInfo.cs b/src/StackExchange.Redis/ClientInfo.cs index fdee896fa..92620bcc7 100644 --- a/src/StackExchange.Redis/ClientInfo.cs +++ b/src/StackExchange.Redis/ClientInfo.cs @@ -47,6 +47,9 @@ public sealed class ClientInfo /// S: the client is a normal replica server /// U: the client is connected via a Unix domain socket /// x: the client is in a MULTI/EXEC context + /// t: the client enabled keys tracking in order to perform client side caching + /// R: the client tracking target client is invalid + /// B: the client enabled broadcast tracking mode /// public string FlagsRaw { get; private set; } @@ -172,7 +175,11 @@ internal static ClientInfo[] Parse(string input) AddFlag(ref flags, value, ClientFlags.Unblocked, 'u'); AddFlag(ref flags, value, ClientFlags.UnixDomainSocket, 'U'); AddFlag(ref flags, value, ClientFlags.Transaction, 'x'); - + + AddFlag(ref flags, value, ClientFlags.KeysTracking, 't'); + AddFlag(ref flags, value, ClientFlags.TrackingTargetInvalid, 'R'); + AddFlag(ref flags, value, ClientFlags.BroadcastTracking, 'B'); + client.Flags = flags; break; case "id": client.Id = Format.ParseInt64(value); break; diff --git a/src/StackExchange.Redis/Enums/ClientFlags.cs b/src/StackExchange.Redis/Enums/ClientFlags.cs index a652f61a4..559a13799 100644 --- a/src/StackExchange.Redis/Enums/ClientFlags.cs +++ b/src/StackExchange.Redis/Enums/ClientFlags.cs @@ -85,5 +85,17 @@ public enum ClientFlags : long /// the client is connected via a Unix domain socket /// UnixDomainSocket = 2048, + /// + /// the client enabled keys tracking in order to perform client side caching + /// + KeysTracking = 4096, + /// + /// the client tracking target client is invalid + /// + TrackingTargetInvalid = 8192, + /// + /// the client enabled broadcast tracking mode + /// + BroadcastTracking = 16384, } } From a4425ecb0bbe5aef74cabb26e75ca0cdb0a7ca57 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 9 Jan 2022 22:23:47 -0500 Subject: [PATCH 043/117] Changes for #1912 Reducing diff from #1912 on bits we can simplify & merge in sooner. --- .../ConnectToUnexistingHost.cs | 8 +-- .../ConnectingFailDetection.cs | 17 +++-- .../ExceptionFactoryTests.cs | 5 +- tests/StackExchange.Redis.Tests/TestBase.cs | 72 ++++++++++++++----- 4 files changed, 74 insertions(+), 28 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs b/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs index e2c454a5c..03757d918 100644 --- a/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs +++ b/tests/StackExchange.Redis.Tests/ConnectToUnexistingHost.cs @@ -48,7 +48,7 @@ void innerScenario() { var ex = Assert.Throws(() => { - using (ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,connectTimeout=1000", Writer)) { } + using (ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,connectTimeout=1000,connectRetry=0", Writer)) { } }); Log(ex.ToString()); } @@ -59,7 +59,7 @@ public async Task CanNotOpenNonsenseConnection_DNS() { var ex = await Assert.ThrowsAsync(async () => { - using (await ConnectionMultiplexer.ConnectAsync($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,connectTimeout=1000", Writer).ForAwait()) { } + using (await ConnectionMultiplexer.ConnectAsync($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,connectTimeout=1000,connectRetry=0", Writer).ForAwait()) { } }).ForAwait(); Log(ex.ToString()); } @@ -70,7 +70,7 @@ public async Task CreateDisconnectedNonsenseConnection_IP() await RunBlockingSynchronousWithExtraThreadAsync(innerScenario).ForAwait(); void innerScenario() { - using (var conn = ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,abortConnect=false,connectTimeout=1000", Writer)) + using (var conn = ConnectionMultiplexer.Connect(TestConfig.Current.MasterServer + ":6500,abortConnect=false,connectTimeout=1000,connectRetry=0", Writer)) { Assert.False(conn.GetServer(conn.GetEndPoints().Single()).IsConnected); Assert.False(conn.GetDatabase().IsConnected(default(RedisKey))); @@ -84,7 +84,7 @@ public async Task CreateDisconnectedNonsenseConnection_DNS() await RunBlockingSynchronousWithExtraThreadAsync(innerScenario).ForAwait(); void innerScenario() { - using (var conn = ConnectionMultiplexer.Connect($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,abortConnect=false,connectTimeout=1000", Writer)) + using (var conn = ConnectionMultiplexer.Connect($"doesnot.exist.ds.{Guid.NewGuid():N}.com:6500,abortConnect=false,connectTimeout=1000,connectRetry=0", Writer)) { Assert.False(conn.GetServer(conn.GetEndPoints().Single()).IsConnected); Assert.False(conn.GetDatabase().IsConnected(default(RedisKey))); diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index d1020e6e3..7b33df000 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -99,22 +99,31 @@ public async Task Issue922_ReconnectRaised() config.AbortOnConnectFail = true; config.KeepAlive = 10; config.SyncTimeout = 1000; + config.AsyncTimeout = 1000; config.ReconnectRetryPolicy = new ExponentialRetry(5000); config.AllowAdmin = true; int failCount = 0, restoreCount = 0; - using (var muxer = ConnectionMultiplexer.Connect(config)) + using (var muxer = ConnectionMultiplexer.Connect(config, log: Writer)) { - muxer.ConnectionFailed += delegate { Interlocked.Increment(ref failCount); }; - muxer.ConnectionRestored += delegate { Interlocked.Increment(ref restoreCount); }; + muxer.ConnectionFailed += (s, e) => + { + Interlocked.Increment(ref failCount); + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; + muxer.ConnectionRestored += (s, e) => + { + Interlocked.Increment(ref restoreCount); + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; muxer.GetDatabase(); Assert.Equal(0, Volatile.Read(ref failCount)); Assert.Equal(0, Volatile.Read(ref restoreCount)); var server = muxer.GetServer(TestConfig.Current.MasterServerAndPort); - server.SimulateConnectionFailure(SimulatedFailureType.All); + server.SimulateConnectionFailure(SimulatedFailureType.InteractiveInbound | SimulatedFailureType.InteractiveOutbound); await UntilCondition(TimeSpan.FromSeconds(10), () => Volatile.Read(ref failCount) + Volatile.Read(ref restoreCount) == 4); // interactive+subscriber = 2 diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 25606a8c2..24889f9d5 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -151,6 +151,7 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple var options = new ConfigurationOptions() { AbortOnConnectFail = abortOnConnect, + ConnectRetry = 1, ConnectTimeout = 500, SyncTimeout = 500, KeepAlive = 5000 @@ -160,12 +161,12 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple if (abortOnConnect) { options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); - muxer = ConnectionMultiplexer.Connect(options); + muxer = ConnectionMultiplexer.Connect(options, Writer); } else { options.EndPoints.Add($"doesnot.exist.{Guid.NewGuid():N}:6379"); - muxer = ConnectionMultiplexer.Connect(options); + muxer = ConnectionMultiplexer.Connect(options, Writer); } using (muxer) diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index 21bf84f55..d1d2ef408 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -119,6 +119,7 @@ static TestBase() Console.WriteLine(" GC LOH Mode: " + GCSettings.LargeObjectHeapCompactionMode); Console.WriteLine(" GC Latency Mode: " + GCSettings.LatencyMode); } + internal static string Time() => DateTime.UtcNow.ToString("HH:mm:ss.ffff"); protected void OnConnectionFailed(object sender, ConnectionFailedEventArgs e) { @@ -223,13 +224,25 @@ protected IServer GetAnyMaster(IConnectionMultiplexer muxer) } internal virtual IInternalConnectionMultiplexer Create( - string clientName = null, int? syncTimeout = null, bool? allowAdmin = null, int? keepAlive = null, - int? connectTimeout = null, string password = null, string tieBreaker = null, TextWriter log = null, - bool fail = true, string[] disabledCommands = null, string[] enabledCommands = null, - bool checkConnect = true, string failMessage = null, - string channelPrefix = null, Proxy? proxy = null, - string configuration = null, bool logTransactionData = true, - bool shared = true, int? defaultDatabase = null, + string clientName = null, + int? syncTimeout = null, + bool? allowAdmin = null, + int? keepAlive = null, + int? connectTimeout = null, + string password = null, + string tieBreaker = null, + TextWriter log = null, + bool fail = true, + string[] disabledCommands = null, + string[] enabledCommands = null, + bool checkConnect = true, + string failMessage = null, + string channelPrefix = null, + Proxy? proxy = null, + string configuration = null, + bool logTransactionData = true, + bool shared = true, + int? defaultDatabase = null, [CallerMemberName] string caller = null) { if (Output == null) @@ -237,8 +250,20 @@ internal virtual IInternalConnectionMultiplexer Create( Assert.True(false, "Failure: Be sure to call the TestBase constuctor like this: BasicOpsTests(ITestOutputHelper output) : base(output) { }"); } - if (shared && _fixture != null && _fixture.IsEnabled && enabledCommands == null && disabledCommands == null && fail && channelPrefix == null && proxy == null - && configuration == null && password == null && tieBreaker == null && defaultDatabase == null && (allowAdmin == null || allowAdmin == true) && expectedFailCount == 0) + // Share a connection if instructed to and we can - many specifics mean no sharing + if (shared + && _fixture != null && _fixture.IsEnabled + && enabledCommands == null + && disabledCommands == null + && fail + && channelPrefix == null + && proxy == null + && configuration == null + && password == null + && tieBreaker == null + && defaultDatabase == null + && (allowAdmin == null || allowAdmin == true) + && expectedFailCount == 0) { configuration = GetConfiguration(); if (configuration == _fixture.Configuration) @@ -255,7 +280,8 @@ internal virtual IInternalConnectionMultiplexer Create( checkConnect, failMessage, channelPrefix, proxy, configuration ?? GetConfiguration(), - logTransactionData, defaultDatabase, caller); + logTransactionData, defaultDatabase, + caller); muxer.InternalError += OnInternalError; muxer.ConnectionFailed += OnConnectionFailed; return muxer; @@ -263,18 +289,28 @@ internal virtual IInternalConnectionMultiplexer Create( public static ConnectionMultiplexer CreateDefault( TextWriter output, - string clientName = null, int? syncTimeout = null, bool? allowAdmin = null, int? keepAlive = null, - int? connectTimeout = null, string password = null, string tieBreaker = null, TextWriter log = null, - bool fail = true, string[] disabledCommands = null, string[] enabledCommands = null, - bool checkConnect = true, string failMessage = null, - string channelPrefix = null, Proxy? proxy = null, - string configuration = null, bool logTransactionData = true, + string clientName = null, + int? syncTimeout = null, + bool? allowAdmin = null, + int? keepAlive = null, + int? connectTimeout = null, + string password = null, + string tieBreaker = null, + TextWriter log = null, + bool fail = true, + string[] disabledCommands = null, + string[] enabledCommands = null, + bool checkConnect = true, + string failMessage = null, + string channelPrefix = null, + Proxy? proxy = null, + string configuration = null, + bool logTransactionData = true, int? defaultDatabase = null, - [CallerMemberName] string caller = null) { StringWriter localLog = null; - if(log == null) + if (log == null) { log = localLog = new StringWriter(); } From b0fb2a136db05359d0202ad700195b083133dc3f Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 9 Jan 2022 22:31:29 -0500 Subject: [PATCH 044/117] Revert 1 mismatch --- tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 24889f9d5..63c6ff2dc 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -151,7 +151,6 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple var options = new ConfigurationOptions() { AbortOnConnectFail = abortOnConnect, - ConnectRetry = 1, ConnectTimeout = 500, SyncTimeout = 500, KeepAlive = 5000 From 990f5e867c33eb5221ff0a5ea2e6a9422512232a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 08:59:46 -0500 Subject: [PATCH 045/117] WIP: Pub/Sub portion of #1912 We're working on pub/sub - breaking it out explicitly. --- src/StackExchange.Redis/RedisSubscriber.cs | 57 ++++++------- src/StackExchange.Redis/ServerEndPoint.cs | 16 ++-- tests/StackExchange.Redis.Tests/PubSub.cs | 93 +++++++++++++++++++--- 3 files changed, 120 insertions(+), 46 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 22b9664b7..3acf60fde 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -13,6 +13,14 @@ public partial class ConnectionMultiplexer { private readonly Dictionary subscriptions = new Dictionary(); + internal int GetSubscriptionsCount() + { + lock (subscriptions) + { + return subscriptions.Count; + } + } + internal static void CompleteAsWorker(ICompletable completable) { if (completable != null) ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); @@ -166,11 +174,10 @@ internal void ResendSubscriptions(ServerEndPoint server) internal bool SubscriberConnected(in RedisChannel channel = default(RedisChannel)) { - var server = GetSubscribedServer(channel); - if (server != null) return server.IsConnected; + // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster + var server = GetSubscribedServer(channel) ?? SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); - server = SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); - return server?.IsConnected == true; + return server?.IsConnected == true && server.IsSubscriberConnected; } internal long ValidateSubscriptions() @@ -221,6 +228,7 @@ public bool Remove(Action handler, ChannelMessageQueue [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "RCS1210:Return completed task instead of returning null.", Justification = "Intentional for efficient success check")] public Task SubscribeToServer(ConnectionMultiplexer multiplexer, in RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { + // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster var selected = multiplexer.SelectServer(RedisCommand.SUBSCRIBE, flags, default(RedisKey)); var bridge = selected?.GetBridge(ConnectionType.Subscription, true); if (bridge == null) return null; @@ -299,14 +307,13 @@ private PendingSubscriptionState(object asyncState, RedisChannel channel, Subscr internal void Resubscribe(in RedisChannel channel, ServerEndPoint server) { - if (server != null && Interlocked.CompareExchange(ref owner, server, server) == server) + // Only re-subscribe to the original server + if (server != null && GetOwner() == server) { var cmd = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; var msg = Message.Create(-1, CommandFlags.FireAndForget, cmd, channel); msg.SetInternalCall(); -#pragma warning disable CS0618 - server.WriteDirectFireAndForgetSync(msg, ResultProcessor.TrackSubscriptions); -#pragma warning restore CS0618 + server.Multiplexer.ExecuteSyncImpl(msg, ResultProcessor.TrackSubscriptions, server); } } @@ -422,36 +429,24 @@ public Task IdentifyEndpointAsync(RedisChannel channel, CommandFlags f public override TimeSpan Ping(CommandFlags flags = CommandFlags.None) { - var msg = CreatePingMessage(flags, out var server); - return ExecuteSync(msg, ResultProcessor.ResponseTimer, server); + var msg = CreatePingMessage(flags); + return ExecuteSync(msg, ResultProcessor.ResponseTimer); } public override Task PingAsync(CommandFlags flags = CommandFlags.None) { - var msg = CreatePingMessage(flags, out var server); - return ExecuteAsync(msg, ResultProcessor.ResponseTimer, server); + var msg = CreatePingMessage(flags); + return ExecuteAsync(msg, ResultProcessor.ResponseTimer); } - private Message CreatePingMessage(CommandFlags flags, out ServerEndPoint server) + private Message CreatePingMessage(CommandFlags flags) { - bool usePing = false; - server = null; - if (multiplexer.CommandMap.IsAvailable(RedisCommand.PING)) - { - try { usePing = GetFeatures(default, flags, out server).PingOnSubscriber; } - catch { } - } - - if (usePing) - { - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); - } - else - { - // can't use regular PING, but we can unsubscribe from something random that we weren't even subscribed to... - RedisValue channel = multiplexer.UniqueId; - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); - } + // We're explicitly NOT using PING here because GetBridge() would send this over the interactive connection + // rather than the subscription connection we intend. + RedisValue channel = multiplexer.UniqueId; + var message = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); + message.SetInternalCall(); + return message; } public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index f41e360a3..b24d43065 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -74,6 +74,8 @@ public ServerEndPoint(ConnectionMultiplexer multiplexer, EndPoint endpoint) public bool IsConnected => interactive?.IsConnected == true; + public bool IsSubscriberConnected => subscription?.IsConnected == true; + public bool IsConnecting => interactive?.IsConnecting == true; private readonly List> _pendingConnectionMonitors = new List>(); @@ -92,7 +94,7 @@ async Task IfConnectedAsync(LogProxy log, bool sendTracerIfConnected, bo } if (sendTracerIfConnected) { - await SendTracer(log).ForAwait(); + await SendTracerAsync(log).ForAwait(); } log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync already connected end"); return "Already connected"; @@ -686,7 +688,7 @@ internal void OnHeartbeat() } } - internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null) + internal Task WriteDirectAsync(Message message, ResultProcessor processor, PhysicalBridge bridge = null) { static async Task Awaited(ServerEndPoint @this, Message message, ValueTask write, TaskCompletionSource tcs) { @@ -699,7 +701,7 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask.Create(out var tcs, asyncState); + var source = TaskResultBox.Create(out var tcs, null); message.SetSource(processor, source); if (bridge == null) bridge = GetBridge(message.Command); @@ -743,7 +745,7 @@ internal void ReportNextFailure() subscription?.ReportNextFailure(); } - internal Task SendTracer(LogProxy log = null) + internal Task SendTracerAsync(LogProxy log = null) { var msg = GetTracerMessage(false); msg = LoggingMessage.Create(log, msg); @@ -783,6 +785,9 @@ internal string Summary() return sb.ToString(); } + /// + /// Write the message directly to the pipe or fail...will not queue. + /// internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection connection, Message message, ResultProcessor processor) { static async ValueTask Awaited(ValueTask l_result) => await l_result.ForAwait(); @@ -886,7 +891,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) log?.WriteLine($"{Format.ToString(this)}: Sending critical tracer (handshake): {tracer.CommandAndKey}"); await WriteDirectOrQueueFireAndForgetAsync(connection, tracer, ResultProcessor.EstablishConnection).ForAwait(); - // note: this **must** be the last thing on the subscription handshake, because after this + // Note: this **must** be the last thing on the subscription handshake, because after this // we will be in subscriber mode: regular commands cannot be sent if (connType == ConnectionType.Subscription) { @@ -894,6 +899,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) if (configChannel != null) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.SUBSCRIBE, (RedisChannel)configChannel); + // Note: this is NOT internal, we want it to queue in a backlog for sending when ready if necessary await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.TrackSubscriptions).ForAwait(); } } diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 0e4131913..9c3d264da 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -1,11 +1,13 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; using StackExchange.Redis.Maintenance; +using StackExchange.Redis.Profiling; using Xunit; using Xunit.Abstractions; // ReSharper disable AccessToModifiedClosure @@ -520,6 +522,9 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); + // Give a delay between subscriptions and when we try to publish to be safe + await Task.Delay(1000).ForAwait(); + lock (syncLock) { for (int i = 0; i < count; i++) @@ -743,8 +748,10 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - using (var muxer = Create(allowAdmin: true, shared: false)) + var session = new ProfilingSession(); + using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) { + muxer.RegisterProfiler(() => session); RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); int counter = 0; @@ -752,23 +759,89 @@ await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); }).ConfigureAwait(false); + + var profile1 = session.FinishProfiling(); + foreach (var command in profile1) + { + Log($"{command.EndPoint}: {command}"); + } + // We shouldn't see the initial connection here + Assert.Equal(0, profile1.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + + Assert.Equal(1, muxer.GetSubscriptionsCount()); + await Task.Delay(200).ConfigureAwait(false); + await sub.PublishAsync(channel, "abc").ConfigureAwait(false); sub.Ping(); await Task.Delay(200).ConfigureAwait(false); - Assert.Equal(1, Thread.VolatileRead(ref counter)); + + var counter1 = Thread.VolatileRead(ref counter); + Log($"Expecting 1 messsage, got {counter1}"); + Assert.Equal(1, counter1); + var server = GetServer(muxer); - Assert.Equal(1, server.GetCounters().Subscription.SocketCount); + var socketCount = server.GetCounters().Subscription.SocketCount; + Log($"Expecting 1 socket, got {socketCount}"); + Assert.Equal(1, socketCount); + // We might fail both connections or just the primary in the time period + SetExpectedAmbientFailureCount(-1); + + // Make sure we fail all the way + muxer.AllowConnect = false; + Log("Failing connection"); + // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); - SetExpectedAmbientFailureCount(2); - await Task.Delay(200).ConfigureAwait(false); - sub.Ping(); - Assert.Equal(2, server.GetCounters().Subscription.SocketCount); - await sub.PublishAsync(channel, "abc").ConfigureAwait(false); - await Task.Delay(200).ConfigureAwait(false); + // Trigger failure + Assert.Throws(() => sub.Ping()); + Assert.False(sub.IsConnected(channel)); + + // Now reconnect... + muxer.AllowConnect = true; + Log("Waiting on reconnect"); + // Wait until we're reconnected + await UntilCondition(TimeSpan.FromSeconds(10), () => sub.IsConnected(channel)); + Log("Reconnected"); + // Ensure we're reconnected + Assert.True(sub.IsConnected(channel)); + + // And time to resubscribe... + await Task.Delay(1000).ConfigureAwait(false); + + // Ensure we've sent the subscribe command after reconnecting + var profile2 = session.FinishProfiling(); + foreach (var command in profile2) + { + Log($"{command.EndPoint}: {command}"); + } + //Assert.Equal(1, profile2.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + + Log($"Issuing ping after reconnected"); sub.Ping(); - Assert.Equal(2, Thread.VolatileRead(ref counter)); + Assert.Equal(1, muxer.GetSubscriptionsCount()); + + Log("Publishing"); + var published = await sub.PublishAsync(channel, "abc").ConfigureAwait(false); + + Log($"Published to {published} subscriber(s)."); + Assert.Equal(1, published); + + // Give it a few seconds to get our messages + Log("Waiting for 2 messages"); + await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); + + var counter2 = Thread.VolatileRead(ref counter); + Log($"Expecting 2 messsages, got {counter2}"); + Assert.Equal(2, counter2); + + // Log all commands at the end + Log("All commands since connecting:"); + var profile3 = session.FinishProfiling(); + foreach (var command in profile3) + { + Log($"{command.EndPoint}: {command}"); + } } } } From d552097e39465e6776293d7958c5116bc96f6b1c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 10:45:05 -0500 Subject: [PATCH 046/117] Lots of things - need to writeup in PR --- .../ConfigurationOptions.cs | 2 +- .../ConnectionMultiplexer.cs | 2 +- src/StackExchange.Redis/Enums/CommandFlags.cs | 2 + src/StackExchange.Redis/ExceptionFactory.cs | 4 +- src/StackExchange.Redis/Message.cs | 12 +++++- src/StackExchange.Redis/RedisBatch.cs | 2 +- src/StackExchange.Redis/RedisSubscriber.cs | 27 ++++++++++--- src/StackExchange.Redis/ServerEndPoint.cs | 40 ++++++++++++++----- .../ConnectingFailDetection.cs | 2 +- tests/StackExchange.Redis.Tests/PubSub.cs | 2 +- tests/StackExchange.Redis.Tests/TestBase.cs | 5 +++ 11 files changed, 77 insertions(+), 23 deletions(-) diff --git a/src/StackExchange.Redis/ConfigurationOptions.cs b/src/StackExchange.Redis/ConfigurationOptions.cs index abc12b579..ddc53f1a8 100644 --- a/src/StackExchange.Redis/ConfigurationOptions.cs +++ b/src/StackExchange.Redis/ConfigurationOptions.cs @@ -280,7 +280,7 @@ public int ConnectTimeout /// /// The server version to assume /// - public Version DefaultVersion { get { return defaultVersion ?? (IsAzureEndpoint() ? RedisFeatures.v4_0_0 : RedisFeatures.v2_8_0); } set { defaultVersion = value; } } + public Version DefaultVersion { get { return defaultVersion ?? (IsAzureEndpoint() ? RedisFeatures.v4_0_0 : RedisFeatures.v3_0_0); } set { defaultVersion = value; } } /// /// The endpoints defined for this configuration diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 055ea3d12..34591b465 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1773,7 +1773,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP { var server = servers[i]; var task = available[i]; - var bs = server.GetBridgeStatus(RedisCommand.PING); + var bs = server.GetBridgeStatus(ConnectionType.Interactive); log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {bs.MessagesSinceLastHeartbeat}, qs: {bs.Connection.MessagesSentAwaitingResponse}, in: {bs.Connection.BytesAvailableOnSocket}, qu: {bs.MessagesSinceLastHeartbeat}, aw: {bs.IsWriterActive}, in-pipe: {bs.Connection.BytesInReadPipe}, out-pipe: {bs.Connection.BytesInWritePipe}, bw: {bs.BacklogStatus}, rs: {bs.Connection.ReadStatus}. ws: {bs.Connection.WriteStatus})"); } diff --git a/src/StackExchange.Redis/Enums/CommandFlags.cs b/src/StackExchange.Redis/Enums/CommandFlags.cs index f0a670d76..c1efc65c1 100644 --- a/src/StackExchange.Redis/Enums/CommandFlags.cs +++ b/src/StackExchange.Redis/Enums/CommandFlags.cs @@ -82,5 +82,7 @@ public enum CommandFlags NoScriptCache = 512, // 1024: used for timed-out; never user-specified, so not visible on the public API + + // 2048: Use subscription connection type; never user-specified, so not visible on the public API } } diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 4cc274d24..fe7aabc3c 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -312,7 +312,7 @@ ServerEndPoint server // Add server data, if we have it if (server != null && message != null) { - var bs = server.GetBridgeStatus(message.Command); + var bs = server.GetBridgeStatus(message.IsForSubscriptionBridge ? ConnectionType.Subscription: ConnectionType.Interactive); switch (bs.Connection.ReadStatus) { @@ -338,7 +338,7 @@ ServerEndPoint server if (multiplexer.StormLogThreshold >= 0 && bs.Connection.MessagesSentAwaitingResponse >= multiplexer.StormLogThreshold && Interlocked.CompareExchange(ref multiplexer.haveStormLog, 1, 0) == 0) { - var log = server.GetStormLog(message.Command); + var log = server.GetStormLog(message); if (string.IsNullOrWhiteSpace(log)) Interlocked.Exchange(ref multiplexer.haveStormLog, 0); else Interlocked.Exchange(ref multiplexer.stormLogSnapshot, log); } diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index c8fdf54f8..05c1f56fb 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -74,7 +74,8 @@ internal void SetBacklogState(int position, PhysicalConnection physical) private const CommandFlags AskingFlag = (CommandFlags)32, ScriptUnavailableFlag = (CommandFlags)256, - NeedsAsyncTimeoutCheckFlag = (CommandFlags)1024; + NeedsAsyncTimeoutCheckFlag = (CommandFlags)1024, + DemandSubscriptionConnection = (CommandFlags)2048; private const CommandFlags MaskMasterServerPreference = CommandFlags.DemandMaster | CommandFlags.DemandReplica @@ -705,6 +706,15 @@ internal void SetWriteTime() private int _writeTickCount; public int GetWriteTime() => Volatile.Read(ref _writeTickCount); + /// + /// Gets if this command should be sent over the subscription bridge. + /// + internal bool IsForSubscriptionBridge => (Flags & DemandSubscriptionConnection) != 0; + /// + /// Sends this command to the subscription connection rather than the interactive. + /// + internal void SetForSubscriptionBridge() => Flags |= DemandSubscriptionConnection; + private void SetNeedsTimeoutCheck() => Flags |= NeedsAsyncTimeoutCheckFlag; internal bool HasAsyncTimedOut(int now, int timeoutMilliseconds, out int millisecondsTaken) { diff --git a/src/StackExchange.Redis/RedisBatch.cs b/src/StackExchange.Redis/RedisBatch.cs index 6f4d70700..7abe234c5 100644 --- a/src/StackExchange.Redis/RedisBatch.cs +++ b/src/StackExchange.Redis/RedisBatch.cs @@ -30,7 +30,7 @@ public void Execute() FailNoServer(snapshot); throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server); } - var bridge = server.GetBridge(message.Command); + var bridge = server.GetBridge(message); if (bridge == null) { FailNoServer(snapshot); diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 3acf60fde..850a10d3b 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -441,12 +441,27 @@ public override Task PingAsync(CommandFlags flags = CommandFlags.None) private Message CreatePingMessage(CommandFlags flags) { - // We're explicitly NOT using PING here because GetBridge() would send this over the interactive connection - // rather than the subscription connection we intend. - RedisValue channel = multiplexer.UniqueId; - var message = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); - message.SetInternalCall(); - return message; + bool usePing = false; + if (multiplexer.CommandMap.IsAvailable(RedisCommand.PING)) + { + try { usePing = GetFeatures(default, flags, out _).PingOnSubscriber; } + catch { } + } + + Message msg; + if (usePing) + { + msg = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); + } + else + { + // can't use regular PING, but we can unsubscribe from something random that we weren't even subscribed to... + RedisValue channel = multiplexer.UniqueId; + msg = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); + } + // Ensure the ping is sent over the intended subscriver connection, which wouldn't happen in GetBridge() by default with PING; + msg.SetForSubscriptionBridge(); + return msg; } public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index b24d43065..ab2889e65 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -211,6 +211,28 @@ public PhysicalBridge GetBridge(ConnectionType type, bool create = true, LogProx }; } + public PhysicalBridge GetBridge(Message message, bool create = true) + { + if (isDisposed) return null; + + // Subscription commands go to a specific bridge - so we need to set that up. + // There are other commands we need to send to the right connection (e.g. subscriber PING with an explicit SetForSubscriptionBridge call), + // but these always go subscriber. + switch (message.Command) + { + case RedisCommand.SUBSCRIBE: + case RedisCommand.UNSUBSCRIBE: + case RedisCommand.PSUBSCRIBE: + case RedisCommand.PUNSUBSCRIBE: + message.SetForSubscriptionBridge(); + break; + } + + return message.IsForSubscriptionBridge + ? subscription ?? (create ? subscription = CreateBridge(ConnectionType.Subscription, null) : null) + : interactive ?? (create ? interactive = CreateBridge(ConnectionType.Interactive, null) : null); + } + public PhysicalBridge GetBridge(RedisCommand command, bool create = true) { if (isDisposed) return null; @@ -283,9 +305,9 @@ public void SetUnselectable(UnselectableFlags flags) public override string ToString() => Format.ToString(EndPoint); [Obsolete("prefer async")] - public WriteResult TryWriteSync(Message message) => GetBridge(message.Command)?.TryWriteSync(message, isReplica) ?? WriteResult.NoConnectionAvailable; + public WriteResult TryWriteSync(Message message) => GetBridge(message)?.TryWriteSync(message, isReplica) ?? WriteResult.NoConnectionAvailable; - public ValueTask TryWriteAsync(Message message) => GetBridge(message.Command)?.TryWriteAsync(message, isReplica) ?? new ValueTask(WriteResult.NoConnectionAvailable); + public ValueTask TryWriteAsync(Message message) => GetBridge(message)?.TryWriteAsync(message, isReplica) ?? new ValueTask(WriteResult.NoConnectionAvailable); internal void Activate(ConnectionType type, LogProxy log) { @@ -447,11 +469,11 @@ internal ServerCounters GetCounters() return counters; } - internal BridgeStatus GetBridgeStatus(RedisCommand command) + internal BridgeStatus GetBridgeStatus(ConnectionType connectionType) { try { - return GetBridge(command, false)?.GetStatus() ?? BridgeStatus.Zero; + return GetBridge(connectionType, false)?.GetStatus() ?? BridgeStatus.Zero; } catch (Exception ex) { // only needs to be best efforts @@ -486,9 +508,9 @@ internal byte[] GetScriptHash(string script, RedisCommand command) return found; } - internal string GetStormLog(RedisCommand command) + internal string GetStormLog(Message message) { - var bridge = GetBridge(command); + var bridge = GetBridge(message); return bridge?.GetStormLog(); } @@ -703,7 +725,7 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask.Create(out var tcs, null); message.SetSource(processor, source); - if (bridge == null) bridge = GetBridge(message.Command); + if (bridge == null) bridge = GetBridge(message); WriteResult result; if (bridge == null) @@ -735,7 +757,7 @@ internal void WriteDirectFireAndForgetSync(Message message, ResultProcessor(PhysicalConnection co if (connection == null) { Multiplexer.Trace($"{Format.ToString(this)}: Enqueue (async): " + message); - result = GetBridge(message.Command).TryWriteAsync(message, isReplica); + result = GetBridge(message).TryWriteAsync(message, isReplica); } else { diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 7b33df000..926948150 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -115,7 +115,7 @@ public async Task Issue922_ReconnectRaised() muxer.ConnectionRestored += (s, e) => { Interlocked.Increment(ref restoreCount); - Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); }; muxer.GetDatabase(); diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 9c3d264da..27976e811 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -794,7 +794,7 @@ await sub.SubscribeAsync(channel, delegate // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); // Trigger failure - Assert.Throws(() => sub.Ping()); + Assert.Throws(() => sub.Ping()); Assert.False(sub.IsConnected(channel)); // Now reconnect... diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index d1d2ef408..449f001ea 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -128,6 +128,7 @@ protected void OnConnectionFailed(object sender, ConnectionFailedEventArgs e) { privateExceptions.Add($"{Time()}: Connection failed ({e.FailureType}): {EndPointCollection.ToString(e.EndPoint)}/{e.ConnectionType}: {e.Exception}"); } + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); } protected void OnInternalError(object sender, InternalErrorEventArgs e) @@ -284,6 +285,10 @@ internal virtual IInternalConnectionMultiplexer Create( caller); muxer.InternalError += OnInternalError; muxer.ConnectionFailed += OnConnectionFailed; + muxer.ConnectionRestored += (s, e) => + { + Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; return muxer; } From fac5a1b2bfd39a69a89e7bc88e65975e25cda66e Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 10:47:56 -0500 Subject: [PATCH 047/117] Fix KeepAlive on PhysicalBridge --- src/StackExchange.Redis/PhysicalBridge.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index f14cf7b10..b9567ab0a 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -356,6 +356,7 @@ internal void KeepAlive() if (commandMap.IsAvailable(RedisCommand.PING) && features.PingOnSubscriber) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.PING); + msg.SetForSubscriptionBridge(); msg.SetSource(ResultProcessor.Tracer, null); } else if (commandMap.IsAvailable(RedisCommand.UNSUBSCRIBE)) From 1cb00ffd593e9a4ab070651cc6d1424628e87bc0 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 10:50:51 -0500 Subject: [PATCH 048/117] Fix default version tests --- tests/StackExchange.Redis.Tests/Config.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Config.cs b/tests/StackExchange.Redis.Tests/Config.cs index a2d0d7034..2a5cf6625 100644 --- a/tests/StackExchange.Redis.Tests/Config.cs +++ b/tests/StackExchange.Redis.Tests/Config.cs @@ -14,7 +14,7 @@ namespace StackExchange.Redis.Tests { public class Config : TestBase { - public Version DefaultVersion = new (2, 8, 0); + public Version DefaultVersion = new (3, 0, 0); public Version DefaultAzureVersion = new (4, 0, 0); public Config(ITestOutputHelper output) : base(output) { } From 7fdb45af68edac792b31c6fa5f66cb46a40d499a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 20:59:24 -0500 Subject: [PATCH 049/117] Fix up Isue922 test now that we ping the right things *Now* this should be stable killing and restoring both connections with proper PING routing in place. --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 926948150..e1d388012 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -123,7 +123,7 @@ public async Task Issue922_ReconnectRaised() Assert.Equal(0, Volatile.Read(ref restoreCount)); var server = muxer.GetServer(TestConfig.Current.MasterServerAndPort); - server.SimulateConnectionFailure(SimulatedFailureType.InteractiveInbound | SimulatedFailureType.InteractiveOutbound); + server.SimulateConnectionFailure(SimulatedFailureType.All); await UntilCondition(TimeSpan.FromSeconds(10), () => Volatile.Read(ref failCount) + Volatile.Read(ref restoreCount) == 4); // interactive+subscriber = 2 From 85c5a4dcbf6def6f4c1ea95e93566ae32e0a8d49 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 21:17:19 -0500 Subject: [PATCH 050/117] Migrate PubSub tests off sync threads --- tests/StackExchange.Redis.Tests/PubSub.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 27976e811..224c4bef8 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -157,6 +157,7 @@ public async Task TestBasicPubSubFireAndForget() var count = sub.Publish(key, "def", CommandFlags.FireAndForget); await PingAsync(muxer, pub, sub).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); @@ -184,9 +185,7 @@ private static async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, I // way to prove that is to use TPL objects var t1 = sub.PingAsync(); var t2 = pub.PingAsync(); - await Task.Delay(100).ForAwait(); // especially useful when testing any-order mode - - if (!Task.WaitAll(new[] { t1, t2 }, muxer.TimeoutMilliseconds * 2)) throw new TimeoutException(); + await Task.WhenAll(t1, t2).ForAwait(); } } From 98701c9d3fd2f9f797fac4a89550bec416724bf3 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 21:33:45 -0500 Subject: [PATCH 051/117] Fix shared connections with simulated failures (cross-test noise) --- tests/StackExchange.Redis.Tests/AsyncTests.cs | 2 +- tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs | 2 +- tests/StackExchange.Redis.Tests/ConnectionShutdown.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/AsyncTests.cs b/tests/StackExchange.Redis.Tests/AsyncTests.cs index 5ee26f815..4dd36670b 100644 --- a/tests/StackExchange.Redis.Tests/AsyncTests.cs +++ b/tests/StackExchange.Redis.Tests/AsyncTests.cs @@ -19,7 +19,7 @@ public void AsyncTasksReportFailureIfServerUnavailable() { SetExpectedAmbientFailureCount(-1); // this will get messy - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { var server = conn.GetServer(TestConfig.Current.MasterServer, TestConfig.Current.MasterPort); diff --git a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs index c52082d12..73af84fa4 100644 --- a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs +++ b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs @@ -13,7 +13,7 @@ public ConnectFailTimeout(ITestOutputHelper output) : base (output) { } public async Task NoticesConnectFail() { SetExpectedAmbientFailureCount(-1); - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { var server = conn.GetServer(conn.GetEndPoints()[0]); diff --git a/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs b/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs index a4e720772..d75054ca4 100644 --- a/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs +++ b/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs @@ -14,7 +14,7 @@ public ConnectionShutdown(ITestOutputHelper output) : base(output) { } [Fact(Skip = "Unfriendly")] public async Task ShutdownRaisesConnectionFailedAndRestore() { - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { int failed = 0, restored = 0; Stopwatch watch = Stopwatch.StartNew(); From 377c813ebd2108add69f225b5469b42b4029f8d5 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 21:34:29 -0500 Subject: [PATCH 052/117] Compensate for delay removal This awaits the condition, rather than a magical delay previously. --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 224c4bef8..a96275254 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -93,6 +93,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br await PingAsync(muxer, pub, sub, 3).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); @@ -221,6 +222,7 @@ public async Task TestPatternPubSub() var count = sub.Publish("abc", "def"); await PingAsync(muxer, pub, sub).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); From d9c68e1fc5080b835d8ff7b9fc318795410a6df7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 21:44:43 -0500 Subject: [PATCH 053/117] Add logging to pubsub methods --- tests/StackExchange.Redis.Tests/PubSub.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index a96275254..cbf80b851 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -57,7 +57,7 @@ await UntilCondition(TimeSpan.FromSeconds(10), [InlineData("Foo:", true, "f")] public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string breaker) { - using (var muxer = Create(channelPrefix: channelPrefix)) + using (var muxer = Create(channelPrefix: channelPrefix, log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); @@ -127,7 +127,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br [Fact] public async Task TestBasicPubSubFireAndForget() { - using (var muxer = Create()) + using (var muxer = Create(log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); From 3f6e03043f37483a23fe15c535d115283e0d6855 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 22:07:47 -0500 Subject: [PATCH 054/117] Add logging to PubSubGetAllCorrectOrder --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index cbf80b851..646041055 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -351,7 +351,7 @@ await sub.SubscribeAsync(channel, (_, val) => [Fact] public async Task PubSubGetAllCorrectOrder() { - using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000)) + using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000, log: Writer)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); From b63648aa686ce30d6299455652c70d948fe279b5 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 10 Jan 2022 22:38:09 -0500 Subject: [PATCH 055/117] Tidy exception messages --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index e1d388012..bbe3a0892 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -110,12 +110,12 @@ public async Task Issue922_ReconnectRaised() muxer.ConnectionFailed += (s, e) => { Interlocked.Increment(ref failCount); - Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Failed ({e.ConnectionType}, {e.FailureType}): {e.Exception}"); }; muxer.ConnectionRestored += (s, e) => { Interlocked.Increment(ref restoreCount); - Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Restored ({e.ConnectionType}, {e.FailureType})"); }; muxer.GetDatabase(); From 25a705823f593594753b2699e7a38e150ce4633f Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 11 Jan 2022 20:41:03 -0500 Subject: [PATCH 056/117] Fix stupid --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 0ecca0b63..1b33b1918 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -116,7 +116,7 @@ public async Task Issue922_ReconnectRaised() muxer.ConnectionRestored += (s, e) => { Interlocked.Increment(ref restoreCount); - Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); }; muxer.GetDatabase(); From a38fac250e62dde87c2b60a7c7c27419e0e8a009 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 11 Jan 2022 21:04:20 -0500 Subject: [PATCH 057/117] Remove unneeded retry change --- tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 0bf7e670c..57cea491e 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -152,7 +152,6 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple { AbortOnConnectFail = abortOnConnect, BacklogPolicy = BacklogPolicy.FailFast, - ConnectRetry = 1, ConnectTimeout = 500, SyncTimeout = 500, KeepAlive = 5000 From 148c9752ea4a5fe55f8a0e41c8ca19990952429e Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 12 Jan 2022 16:15:45 -0500 Subject: [PATCH 058/117] Eliminate writer here --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index bbe3a0892..5042d51a8 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -105,7 +105,7 @@ public async Task Issue922_ReconnectRaised() int failCount = 0, restoreCount = 0; - using (var muxer = ConnectionMultiplexer.Connect(config, log: Writer)) + using (var muxer = ConnectionMultiplexer.Connect(config)) { muxer.ConnectionFailed += (s, e) => { From bf9fa07ad5d8cf0f28a9b0aa5069ffc9534886cf Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 14 Jan 2022 22:02:48 -0500 Subject: [PATCH 059/117] Writer: switch back to SemaphoreSlim Since Semaphore slim has been fixed on all the platforms we're building for these days, this tests moving back. Getting some test run comparison data, but all synthetic bechmarks are looking good. See https://github.com/mgravell/Pipelines.Sockets.Unofficial/issues/64 for details --- src/StackExchange.Redis/PhysicalBridge.cs | 114 ++++++++++++---------- 1 file changed, 62 insertions(+), 52 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index f14cf7b10..1cf23da25 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -7,11 +7,8 @@ using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; -using Pipelines.Sockets.Unofficial.Threading; -using static Pipelines.Sockets.Unofficial.Threading.MutexSlim; using static StackExchange.Redis.ConnectionMultiplexer; using PendingSubscriptionState = global::StackExchange.Redis.ConnectionMultiplexer.Subscription.PendingSubscriptionState; - namespace StackExchange.Redis { internal sealed class PhysicalBridge : IDisposable @@ -54,7 +51,6 @@ public PhysicalBridge(ServerEndPoint serverEndPoint, ConnectionType type, int ti Multiplexer = serverEndPoint.Multiplexer; Name = Format.ToString(serverEndPoint.EndPoint) + "/" + ConnectionType.ToString(); TimeoutMilliseconds = timeoutMilliseconds; - _singleWriterMutex = new MutexSlim(timeoutMilliseconds: timeoutMilliseconds); } private readonly int TimeoutMilliseconds; @@ -316,7 +312,7 @@ internal readonly struct BridgeStatus internal BridgeStatus GetStatus() => new() { MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), - IsWriterActive = !_singleWriterMutex.IsAvailable, + IsWriterActive = _singleWriterMutex.CurrentCount == 0, BacklogMessagesPending = _backlog.Count, BacklogStatus = _backlogStatus, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, @@ -638,7 +634,7 @@ internal bool TryEnqueue(List messages, bool isReplica) return true; } - private readonly MutexSlim _singleWriterMutex; + private readonly SemaphoreSlim _singleWriterMutex = new(1,1); private Message _activeMessage; @@ -721,11 +717,11 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical return WriteResult.Success; // queued counts as success } - LockToken token = default; + bool gotLock = false; try { - token = _singleWriterMutex.TryWait(WaitOptions.NoDelay); - if (!token.Success) + gotLock = _singleWriterMutex.Wait(0); + if (!gotLock) { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? @@ -734,8 +730,8 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical // no backlog... try to wait with the timeout; // if we *still* can't get it: that counts as // an actual timeout - token = _singleWriterMutex.TryWait(); - if (!token.Success) return TimedOutBeforeWrite(message); + gotLock = _singleWriterMutex.Wait(TimeoutMilliseconds); + if (!gotLock) return TimedOutBeforeWrite(message); } var result = WriteMessageInsideLock(physical, message); @@ -752,7 +748,10 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical finally { UnmarkActiveMessage(message); - token.Dispose(); + if (gotLock) + { + _singleWriterMutex.Release(); + } } } @@ -855,7 +854,7 @@ internal enum BacklogStatus : byte private volatile BacklogStatus _backlogStatus; private async Task ProcessBacklogAsync() { - LockToken token = default; + bool gotLock = false; try { #if DEBUG @@ -870,8 +869,8 @@ private async Task ProcessBacklogAsync() if (_backlog.IsEmpty) return; // nothing to do // try and get the lock; if unsuccessful, retry - token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); - if (token.Success) break; // got the lock; now go do something with it + gotLock = await _singleWriterMutex.WaitAsync(TimeoutMilliseconds).ConfigureAwait(false); + if (gotLock) break; // got the lock; now go do something with it #if DEBUG failureCount++; @@ -953,8 +952,11 @@ private async Task ProcessBacklogAsync() _backlogStatus = BacklogStatus.Faulted; } finally - { - token.Dispose(); + { + if (gotLock) + { + _singleWriterMutex.Release(); + } // Do this in finally block, so that thread aborts can't convince us the backlog processor is running forever if (Interlocked.CompareExchange(ref _backlogProcessorIsRunning, 0, 1) != 1) @@ -987,7 +989,7 @@ private WriteResult TimedOutBeforeWrite(Message message) /// /// This writes a message to the output stream /// - /// The phsyical connection to write to. + /// The physical connection to write to. /// The message to be written. internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message) { @@ -1016,13 +1018,13 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect bool releaseLock = true; // fine to default to true, as it doesn't matter until token is a "success" int lockTaken = 0; - LockToken token = default; + bool gotLock = false; try { // try to acquire it synchronously // note: timeout is specified in mutex-constructor - token = _singleWriterMutex.TryWait(options: WaitOptions.NoDelay); - if (!token.Success) + gotLock = _singleWriterMutex.Wait(0); + if (!gotLock) { // we can't get it *instantaneously*; is there // perhaps a backlog and active backlog processor? @@ -1032,11 +1034,11 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect // no backlog... try to wait with the timeout; // if we *still* can't get it: that counts as // an actual timeout - var pending = _singleWriterMutex.TryWaitAsync(options: WaitOptions.DisableAsyncContext); - if (!pending.IsCompletedSuccessfully) return WriteMessageTakingWriteLockAsync_Awaited(pending, physical, message); + var pending = _singleWriterMutex.WaitAsync(TimeoutMilliseconds); + if (pending.Status != TaskStatus.RanToCompletion) return WriteMessageTakingWriteLockAsync_Awaited(pending, physical, message); - token = pending.Result; // fine since we know we got a result - if (!token.Success) return new ValueTask(TimedOutBeforeWrite(message)); + gotLock = pending.Result; // fine since we know we got a result + if (!gotLock) return new ValueTask(TimedOutBeforeWrite(message)); } lockTaken = Environment.TickCount; @@ -1048,7 +1050,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect if (!flush.IsCompletedSuccessfully) { releaseLock = false; // so we don't release prematurely - return CompleteWriteAndReleaseLockAsync(token, flush, message, lockTaken); + return CompleteWriteAndReleaseLockAsync(flush, message, lockTaken); } result = flush.Result; // we know it was completed, this is fine @@ -1061,7 +1063,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect catch (Exception ex) { return new ValueTask(HandleWriteException(message, ex)); } finally { - if (token.Success) + if (gotLock) { UnmarkActiveMessage(message); @@ -1070,11 +1072,12 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect #if DEBUG RecordLockDuration(lockTaken); #endif - token.Dispose(); + _singleWriterMutex.Release(); } } } } + #if DEBUG private void RecordLockDuration(int lockTaken) { @@ -1084,30 +1087,29 @@ private void RecordLockDuration(int lockTaken) volatile int _maxLockDuration = -1; #endif - private async ValueTask WriteMessageTakingWriteLockAsync_Awaited(ValueTask pending, PhysicalConnection physical, Message message) + private async ValueTask WriteMessageTakingWriteLockAsync_Awaited(Task pending, PhysicalConnection physical, Message message) { + bool gotLock = false; try { - using (var token = await pending.ForAwait()) - { - if (!token.Success) return TimedOutBeforeWrite(message); + gotLock = await pending.ForAwait(); + if (!gotLock) return TimedOutBeforeWrite(message); #if DEBUG - int lockTaken = Environment.TickCount; + int lockTaken = Environment.TickCount; #endif - var result = WriteMessageInsideLock(physical, message); + var result = WriteMessageInsideLock(physical, message); - if (result == WriteResult.Success) - { - result = await physical.FlushAsync(false).ForAwait(); - } + if (result == WriteResult.Success) + { + result = await physical.FlushAsync(false).ForAwait(); + } - physical.SetIdle(); + physical.SetIdle(); #if DEBUG - RecordLockDuration(lockTaken); + RecordLockDuration(lockTaken); #endif - return result; - } + return result; } catch (Exception ex) { @@ -1116,23 +1118,31 @@ private async ValueTask WriteMessageTakingWriteLockAsync_Awaited(Va finally { UnmarkActiveMessage(message); + if (gotLock) + { + _singleWriterMutex.Release(); + } } } - private async ValueTask CompleteWriteAndReleaseLockAsync(LockToken lockToken, ValueTask flush, Message message, int lockTaken) + private async ValueTask CompleteWriteAndReleaseLockAsync(ValueTask flush, Message message, int lockTaken) { - using (lockToken) + try + { + var result = await flush.ForAwait(); + physical.SetIdle(); + return result; + } + catch (Exception ex) + { + return HandleWriteException(message, ex); + } + finally { - try - { - var result = await flush.ForAwait(); - physical.SetIdle(); - return result; - } - catch (Exception ex) { return HandleWriteException(message, ex); } #if DEBUG - finally { RecordLockDuration(lockTaken); } + RecordLockDuration(lockTaken); #endif + _singleWriterMutex.Release(); } } From e4e6d735a31f7b0f1a1b30df1c1f4371c1b4cfd6 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 16 Jan 2022 16:00:05 -0500 Subject: [PATCH 060/117] Ignore message ordering on the hot paths Ordering is now handled by the backlog itself kicking in when optimal - we were taking a ~10% perf hit for the additional ordering check with these in place. --- src/StackExchange.Redis/PhysicalBridge.cs | 33 ++--------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index d3ff708c2..e3cb9b425 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -776,14 +776,6 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical Trace("Writing: " + message); message.SetEnqueued(physical); // this also records the read/write stats at this point - // AVOID REORDERING MESSAGES - // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. - // We do this before attempting to take the write lock, because we won't actually write, we'll just let the backlog get processed in due course - if (TryPushToBacklog(message, onlyIfExists: true)) - { - return WriteResult.Success; // queued counts as success - } - LockToken token = default; try { @@ -1089,29 +1081,9 @@ private WriteResult TimedOutBeforeWrite(Message message) /// Whether this message should bypass the backlog, going straight to the pipe or failing. internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnection physical, Message message, bool bypassBacklog = false) { - /* design decision/choice; the code works fine either way, but if this is - * set to *true*, then when we can't take the writer-lock *right away*, - * we push the message to the backlog (starting a worker if needed) - * - * otherwise, we go for a TryWaitAsync and rely on the await machinery - * - * "true" seems to give faster times *when under heavy contention*, based on profiling - * but it involves the backlog concept; "false" works well under low contention, and - * makes more use of async - */ - const bool ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK = true; - Trace("Writing: " + message); message.SetEnqueued(physical); // this also records the read/write stats at this point - // AVOID REORDERING MESSAGES - // Prefer to add it to the backlog if this thread can see that there might already be a message backlog. - // We do this before attempting to take the write lock, because we won't actually write, we'll just let the backlog get processed in due course - if (TryPushToBacklog(message, onlyIfExists: physical.HasOutputPipe, bypassBacklog: bypassBacklog)) - { - return new ValueTask(WriteResult.Success); // queued counts as success - } - bool releaseLock = true; // fine to default to true, as it doesn't matter until token is a "success" int lockTaken = 0; LockToken token = default; @@ -1122,9 +1094,8 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect token = _singleWriterMutex.TryWait(options: WaitOptions.NoDelay); if (!token.Success) { - // we can't get it *instantaneously*; is there - // perhaps a backlog and active backlog processor? - if (TryPushToBacklog(message, onlyIfExists: !ALWAYS_USE_BACKLOG_IF_CANNOT_GET_SYNC_LOCK, bypassBacklog: bypassBacklog)) + // If we can't get it *instantaneously*; pass it to the backlog for throughput + if (TryPushToBacklog(message, onlyIfExists: false, bypassBacklog: bypassBacklog)) return new ValueTask(WriteResult.Success); // queued counts as success // no backlog... try to wait with the timeout; From 62c6b7dc1df4f62ab0ea2ffb9007ccb39bd77bb4 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 16 Jan 2022 16:44:46 -0500 Subject: [PATCH 061/117] Change up Issue 922 for better reporting and accuracy --- .../ConnectingFailDetection.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 1b33b1918..31a1a34a9 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -124,12 +124,16 @@ public async Task Issue922_ReconnectRaised() Assert.Equal(0, Volatile.Read(ref restoreCount)); var server = muxer.GetServer(TestConfig.Current.MasterServerAndPort); - server.SimulateConnectionFailure(SimulatedFailureType.InteractiveInbound | SimulatedFailureType.InteractiveOutbound); + server.SimulateConnectionFailure(SimulatedFailureType.All); + + await UntilCondition(TimeSpan.FromSeconds(10), () => Volatile.Read(ref failCount) >= 2 && Volatile.Read(ref restoreCount) >= 2); - await UntilCondition(TimeSpan.FromSeconds(10), () => Volatile.Read(ref failCount) + Volatile.Read(ref restoreCount) == 4); // interactive+subscriber = 2 - Assert.Equal(2, Volatile.Read(ref failCount)); - Assert.Equal(2, Volatile.Read(ref restoreCount)); + var failCountSnapshot = Volatile.Read(ref failCount); + Assert.True(failCountSnapshot >= 2, $"failCount {failCountSnapshot} >= 2"); + + var restoreCountSnapshot = Volatile.Read(ref restoreCount); + Assert.True(restoreCountSnapshot >= 2, $"restoreCount ({restoreCountSnapshot}) >= 2"); } } From 532c01fcc428063eba587a27bf5c340f8bd2080c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 17 Jan 2022 15:48:33 -0500 Subject: [PATCH 062/117] Move to Task.Run() for .NET 6, cleanup, and backlog queue contention reduction --- .../ConnectionMultiplexer.cs | 16 +- .../Maintenance/ServerMaintenanceEvent.cs | 3 +- src/StackExchange.Redis/PhysicalBridge.cs | 148 ++++++++++-------- src/StackExchange.Redis/PhysicalConnection.cs | 3 + .../Profiling/ProfiledCommand.cs | 9 +- .../SharedConnectionFixture.cs | 16 +- tests/StackExchange.Redis.Tests/TestBase.cs | 3 +- 7 files changed, 111 insertions(+), 87 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index d05ce03fd..9bbd29b5e 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -152,7 +152,7 @@ private static string GetDefaultClientName() } /// - /// Tries to get the Roleinstance Id if Microsoft.WindowsAzure.ServiceRuntime is loaded. + /// Tries to get the RoleInstance Id if Microsoft.WindowsAzure.ServiceRuntime is loaded. /// In case of any failure, swallows the exception and returns null /// internal static string TryGetAzureRoleInstanceIdNoThrow() @@ -464,7 +464,7 @@ internal void MakeMaster(ServerEndPoint server, ReplicationChangeOptions options // Try and broadcast the fact a change happened to all members // We want everyone possible to pick it up. // We broadcast before *and after* the change to remote members, so that they don't go without detecting a change happened. - // This eliminates the race of pub/sub *then* re-slaving happening, since a method both preceeds and follows. + // This eliminates the race of pub/sub *then* re-slaving happening, since a method both precedes and follows. void Broadcast(ReadOnlySpan serverNodes) { if ((options & ReplicationChangeOptions.Broadcast) != 0 && ConfigurationChangedChannel != null @@ -1672,7 +1672,7 @@ private void ActivateAllServers(LogProxy log) internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogProxy log, EndPoint blame, string cause, bool publishReconfigure = false, CommandFlags publishReconfigureFlags = CommandFlags.None) { if (_isDisposed) throw new ObjectDisposedException(ToString()); - bool showStats = log is object; + bool showStats = log is not null; bool ranThisCall = false; try @@ -2242,7 +2242,7 @@ private bool PrepareToPushMessageToBridge(Message message, ResultProcessor } } - Trace("Queueing on server: " + message); + Trace("Queuing on server: " + message); return true; } Trace("No server or server unavailable - aborting: " + message); @@ -2836,7 +2836,7 @@ internal T ExecuteSyncImpl(Message message, ResultProcessor processor, Ser if (Monitor.Wait(source, TimeoutMilliseconds)) { - Trace("Timeley response to " + message); + Trace("Timely response to " + message); } else { @@ -2875,10 +2875,8 @@ internal T ExecuteSyncImpl(Message message, ResultProcessor processor, Ser /// /// Obtains the log of unusual busy patterns /// - public string GetStormLog() - { - return Volatile.Read(ref stormLogSnapshot); - } + public string GetStormLog() => Volatile.Read(ref stormLogSnapshot); + /// /// Resets the log of unusual busy patterns /// diff --git a/src/StackExchange.Redis/Maintenance/ServerMaintenanceEvent.cs b/src/StackExchange.Redis/Maintenance/ServerMaintenanceEvent.cs index 46e9dcd46..20246eacb 100644 --- a/src/StackExchange.Redis/Maintenance/ServerMaintenanceEvent.cs +++ b/src/StackExchange.Redis/Maintenance/ServerMaintenanceEvent.cs @@ -46,7 +46,6 @@ internal async static Task AddListenersAsync(ConnectionMultiplexer muxer, LogPro /// /// Returns a string representing the maintenance event with all of its properties. /// - public override string ToString() - => RawMessage; + public override string ToString() => RawMessage; } } diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 35037fc7b..6753a203b 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -35,6 +35,7 @@ internal sealed class PhysicalBridge : IDisposable private readonly ConcurrentQueue _backlog = new(); private bool BacklogHasItems => !_backlog.IsEmpty; private int _backlogProcessorIsRunning = 0; + private int _backlogCurrentEnqueued = 0; private long _backlogTotalEnqueued = 0; private int activeWriters = 0; @@ -86,14 +87,7 @@ public enum State : byte public ServerEndPoint ServerEndPoint { get; } - public long SubscriptionCount - { - get - { - var tmp = physical; - return tmp == null ? 0 : physical.SubscriptionCount; - } - } + public long SubscriptionCount => physical?.SubscriptionCount ?? 0; internal State ConnectionState => (State)state; internal bool IsBeating => Interlocked.CompareExchange(ref beating, 0, 0) == 1; @@ -137,32 +131,25 @@ public void ReportNextFailure() private WriteResult QueueOrFailMessage(Message message) { - if (message.IsInternalCall && message.Command != RedisCommand.QUIT) - { - // you can go in the queue, but we won't be starting - // a worker, because the handshake has not completed - message.SetEnqueued(null); - message.SetBacklogState(_backlog.Count, null); - _backlog.Enqueue(message); - Interlocked.Increment(ref _backlogTotalEnqueued); - return WriteResult.Success; // we'll take it... - } - else if (Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + // If it's an internal call that's not a QUIT + // or we're allowed to queue in general, then queue + if (message.IsInternalCall || Multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) { - message.SetEnqueued(null); - message.SetBacklogState(_backlog.Count, null); - _backlog.Enqueue(message); - Interlocked.Increment(ref _backlogTotalEnqueued); - return WriteResult.Success; // we'll queue for retry here... - } - else - { - // sorry, we're just not ready for you yet; - message.Cancel(); - Multiplexer?.OnMessageFaulted(message, null); - message.Complete(); - return WriteResult.NoConnectionAvailable; + // Let's just never ever queue a QUIT message + if (message.Command != RedisCommand.QUIT) + { + message.SetEnqueued(null); + BacklogEnqueue(message, null); + // Note: we don't start a worker on each message here + return WriteResult.Success; // Successfully queued, so indicate success + } } + + // Anything else goes in the bin - we're just not ready for you yet + message.Cancel(); + Multiplexer?.OnMessageFaulted(message, null); + message.Complete(); + return WriteResult.NoConnectionAvailable; } private WriteResult FailDueToNoConnection(Message message) @@ -302,7 +289,7 @@ private async Task ExecuteSubscriptionLoop() // pushes items that have been enqu { try { - // Treat these commands as background/handshake and do not allow queueing to backlog + // Treat these commands as background/handshake and do not allow queuing to backlog if ((await TryWriteAsync(next.Message, next.IsReplica).ForAwait()) != WriteResult.Success) { next.Abort(); @@ -334,15 +321,19 @@ internal readonly struct BridgeStatus /// public bool IsWriterActive { get; init; } + /// + /// Status of the currently processing backlog, if any. + /// + public BacklogStatus BacklogStatus { get; init; } + /// /// The number of messages that are in the backlog queue (waiting to be sent when the connection is healthy again). /// public int BacklogMessagesPending { get; init; } /// - /// Status of the currently processing backlog, if any. + /// The number of messages that are in the backlog queue (waiting to be sent when the connection is healthy again). /// - public BacklogStatus BacklogStatus { get; init; } - + public int BacklogMessagesPendingCounter { get; init; } /// /// The number of messages ever added to the backlog queue in the life of this connection. /// @@ -357,6 +348,9 @@ internal readonly struct BridgeStatus /// The default bridge stats, notable *not* the same as default since initializers don't run. /// public static BridgeStatus Zero { get; } = new() { Connection = PhysicalConnection.ConnectionStatus.Zero }; + + public override string ToString() => + $"MessagesSinceLastHeartbeat: {MessagesSinceLastHeartbeat}, Writer: {(IsWriterActive ? "Active" : "Inactive")}, BacklogStatus: {BacklogStatus}, BacklogMessagesPending: (Queue: {BacklogMessagesPending}, Counter: {BacklogMessagesPendingCounter}), TotalBacklogMessagesQueued: {TotalBacklogMessagesQueued}, Connection: ({Connection})"; } internal BridgeStatus GetStatus() => new() @@ -364,6 +358,7 @@ internal readonly struct BridgeStatus MessagesSinceLastHeartbeat = (int)(Interlocked.Read(ref operationCount) - Interlocked.Read(ref profileLastLog)), IsWriterActive = _singleWriterMutex.CurrentCount == 0, BacklogMessagesPending = _backlog.Count, + BacklogMessagesPendingCounter = Volatile.Read(ref _backlogCurrentEnqueued), BacklogStatus = _backlogStatus, TotalBacklogMessagesQueued = _backlogTotalEnqueued, Connection = physical?.GetStatus() ?? PhysicalConnection.ConnectionStatus.Default, @@ -516,7 +511,7 @@ internal void OnDisconnected(ConnectionFailureType failureType, PhysicalConnecti private void AbandonPendingBacklog(Exception ex) { - while (_backlog.TryDequeue(out Message next)) + while (BacklogTryDequeue(out Message next)) { Multiplexer?.OnMessageFaulted(next, ex); next.SetExceptionAndComplete(ex, this); @@ -535,7 +530,10 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) ServerEndPoint.OnFullyEstablished(connection, source); // do we have pending system things to do? - if (BacklogHasItems) StartBacklogProcessor(); + if (BacklogHasItems) + { + StartBacklogProcessor(); + } if (ConnectionType == ConnectionType.Interactive) ServerEndPoint.CheckInfoReplication(); } @@ -655,16 +653,11 @@ internal void OnHeartbeat(bool ifConnectedOnly) } } - internal void RemovePhysical(PhysicalConnection connection) - { + internal void RemovePhysical(PhysicalConnection connection) => Interlocked.CompareExchange(ref physical, null, connection); - } [Conditional("VERBOSE")] - internal void Trace(string message) - { - Multiplexer.Trace(message, ToString()); - } + internal void Trace(string message) => Multiplexer.Trace(message, ToString()); [Conditional("VERBOSE")] internal void Trace(bool condition, string message) @@ -686,7 +679,8 @@ internal bool TryEnqueue(List messages, bool isReplica) var physical = this.physical; if (physical == null) return false; foreach (var message in messages) - { // deliberately not taking a single lock here; we don't care if + { + // deliberately not taking a single lock here; we don't care if // other threads manage to interleave - in fact, it would be desirable // (to avoid a batch monopolising the connection) #pragma warning disable CS0618 @@ -824,7 +818,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical } } - //[MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool TryPushToBacklog(Message message, bool onlyIfExists, bool bypassBacklog = false) { // In the handshake case: send the command directly through. @@ -842,17 +836,12 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool bypassBac // we only need care if WE are able to // see the queue when its empty. Not whether anyone else sees it as empty. // So strong synchronization is not required. - if (onlyIfExists && _backlog.IsEmpty) + if (onlyIfExists && Volatile.Read(ref _backlogCurrentEnqueued) == 0) { return false; } -#if DEBUG // Don't eat count cost in .Release - int count = _backlog.Count; - message.SetBacklogState(count, physical); -#endif - _backlog.Enqueue(message); - Interlocked.Increment(ref _backlogTotalEnqueued); + BacklogEnqueue(message, physical); // The correct way to decide to start backlog process is not based on previously empty // but based on a) not empty now (we enqueued!) and b) no backlog processor already running. @@ -861,6 +850,28 @@ private bool TryPushToBacklog(Message message, bool onlyIfExists, bool bypassBac return true; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void BacklogEnqueue(Message message, PhysicalConnection physical) + { + // Increment first to ensure we over-trigger backlog processing rather than under-trigger if anything + var position = Interlocked.Increment(ref _backlogCurrentEnqueued); + message.SetBacklogState(position, physical); + + _backlog.Enqueue(message); + Interlocked.Increment(ref _backlogTotalEnqueued); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool BacklogTryDequeue(out Message message) + { + if (_backlog.TryDequeue(out message)) + { + Interlocked.Decrement(ref _backlogCurrentEnqueued); + return true; + } + return false; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void StartBacklogProcessor() { @@ -871,6 +882,10 @@ private void StartBacklogProcessor() #endif _backlogStatus = BacklogStatus.Activating; +#if NET6_0_OR_GREATER + // In .NET 6, use the thread pool stall semantics to our advantage and use a lighter-weight Task + Task.Run(ProcessBacklogsAsync); +#else // Start the backlog processor; this is a bit unorthodox, as you would *expect* this to just // be Task.Run; that would work fine when healthy, but when we're falling on our face, it is // easy to get into a thread-pool-starvation "spiral of death" if we rely on the thread-pool @@ -883,6 +898,7 @@ private void StartBacklogProcessor() Name = "StackExchange.Redis Backlog", // help anyone looking at thread-dumps }; thread.Start(this); +#endif } } #if DEBUG @@ -893,7 +909,7 @@ private void StartBacklogProcessor() /// Crawls from the head of the backlog queue, consuming anything that should have timed out /// and pruning it accordingly (these messages will get timeout exceptions). /// - private void CheckBacklogForTimeouts(ConcurrentQueue backlog) + private void CheckBacklogForTimeouts() { var now = Environment.TickCount; var timeout = TimeoutMilliseconds; @@ -905,15 +921,15 @@ private void CheckBacklogForTimeouts(ConcurrentQueue backlog) { // See if the message has pass our async timeout threshold // or has otherwise been completed (e.g. a sync wait timed out) which would have cleared the ResultBox - if (message.HasTimedOut(now, timeout, out var _) || message.ResultBox == null) break; // not a timeout - we can stop looking - lock (backlog) + if (!message.HasTimedOut(now, timeout, out var _) || message.ResultBox == null) break; // not a timeout - we can stop looking + lock (_backlog) { // Peek again since we didn't have lock before... // and rerun the exact same checks as above, note that it may be a different message now - if (!backlog.TryPeek(out message)) break; + if (!_backlog.TryPeek(out message)) break; if (!message.HasTimedOut(now, timeout, out var _) && message.ResultBox != null) break; - if (!backlog.TryDequeue(out var message2) || (message != message2)) // consume it for real + if (!BacklogTryDequeue(out var message2) || (message != message2)) // consume it for real { throw new RedisException("Thread safety bug detected! A queue message disappeared while we had the backlog lock"); } @@ -960,7 +976,7 @@ private async Task ProcessBacklogsAsync() // TODO: vNext handoff this backlog to another primary ("can handle everything") connection // and remove any per-server commands. This means we need to track a bit of whether something // was server-endpoint-specific in PrepareToPushMessageToBridge (was the server ref null or not) - await ProcessBridgeBacklogAsync(_backlog); // Needs handoff + await ProcessBridgeBacklogAsync(); // Needs handoff } } catch @@ -989,7 +1005,7 @@ private async Task ProcessBacklogsAsync() } } - private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) + private async Task ProcessBridgeBacklogAsync() { // Importantly: don't assume we have a physical connection here // We are very likely to hit a state where it's not re-established or even referenced here @@ -1005,14 +1021,14 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) // First eliminate any messages that have timed out already. _backlogStatus = BacklogStatus.CheckingForTimeout; - CheckBacklogForTimeouts(backlog); + CheckBacklogForTimeouts(); _backlogStatus = BacklogStatus.CheckingForTimeoutComplete; // For the rest of the backlog, if we're not connected there's no point - abort out while (IsConnected) { // check whether the backlog is empty *before* even trying to get the lock - if (backlog.IsEmpty) return; // nothing to do + if (_backlog.IsEmpty) return; // nothing to do // try and get the lock; if unsuccessful, retry gotLock = await _singleWriterMutex.WaitAsync(TimeoutMilliseconds).ConfigureAwait(false); @@ -1035,13 +1051,11 @@ private async Task ProcessBridgeBacklogAsync(ConcurrentQueue backlog) Message message; _backlogStatus = BacklogStatus.CheckingForWork; - // We need to lock _backlog when dequeuing because of - // races with timeout processing logic (e.g. next heartbeat hitting us - lock (backlog) + lock (_backlog) { // Note that we're actively taking it off the queue here, not peeking // If there's nothing left in queue, we're done. - if (!backlog.TryDequeue(out message)) break; + if (!BacklogTryDequeue(out message)) break; } try diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index bf37905ee..3f796646f 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -1311,6 +1311,9 @@ internal readonly struct ConnectionStatus /// public WriteStatus WriteStatus { get; init; } + public override string ToString() => + $"SentAwaitingResponse: {MessagesSentAwaitingResponse}, AvailableOnSocket: {BytesAvailableOnSocket} byte(s), InReadPipe: {BytesInReadPipe} byte(s), InWritePipe: {BytesInWritePipe} byte(s), ReadStatus: {ReadStatus}, WriteStatus: {WriteStatus}"; + /// /// The default connection stats, notable *not* the same as default since initializers don't run. /// diff --git a/src/StackExchange.Redis/Profiling/ProfiledCommand.cs b/src/StackExchange.Redis/Profiling/ProfiledCommand.cs index 7d6a8fcfe..5f4ff899f 100644 --- a/src/StackExchange.Redis/Profiling/ProfiledCommand.cs +++ b/src/StackExchange.Redis/Profiling/ProfiledCommand.cs @@ -116,10 +116,8 @@ public void SetCompleted() } } - public override string ToString() - { - return - $@"EndPoint = {EndPoint} + public override string ToString() => +$@"EndPoint = {EndPoint} Db = {Db} Command = {Command} CommandCreated = {CommandCreated:u} @@ -129,7 +127,6 @@ public override string ToString() ResponseToCompletion = {ResponseToCompletion} ElapsedTime = {ElapsedTime} Flags = {Flags} -RetransmissionOf = ({RetransmissionOf})"; - } +RetransmissionOf = ({RetransmissionOf?.ToString() ?? "nothing"})"; } } diff --git a/tests/StackExchange.Redis.Tests/SharedConnectionFixture.cs b/tests/StackExchange.Redis.Tests/SharedConnectionFixture.cs index bf22489dd..a91047be1 100644 --- a/tests/StackExchange.Redis.Tests/SharedConnectionFixture.cs +++ b/tests/StackExchange.Redis.Tests/SharedConnectionFixture.cs @@ -326,8 +326,20 @@ public void Teardown(TextWriter output) } //Assert.True(false, $"There were {privateFailCount} private ambient exceptions."); } - var pool = SocketManager.Shared?.SchedulerPool; - TestBase.Log(output, $"Service Counts: (Scheduler) By Queue: {pool?.TotalServicedByQueue.ToString()}, By Pool: {pool?.TotalServicedByPool.ToString()}, Workers: {pool?.WorkerCount.ToString()}, Available: {pool?.AvailableCount.ToString()}"); + + if (_actualConnection != null) + { + TestBase.Log(output, "Connection Counts: " + _actualConnection.GetCounters().ToString()); + foreach (var ep in _actualConnection.GetServerSnapshot()) + { + var interactive = ep.GetBridge(ConnectionType.Interactive); + TestBase.Log(output, $" {Format.ToString(interactive)}: " + interactive.GetStatus()); + + var subscription = ep.GetBridge(ConnectionType.Subscription); + TestBase.Log(output, $" {Format.ToString(subscription)}: " + subscription.GetStatus()); + } + + } } } diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index 794358028..7df1114a7 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -195,7 +195,8 @@ public void Teardown() } Skip.Inconclusive($"There were {privateFailCount} private and {sharedFailCount.Value} ambient exceptions; expected {expectedFailCount}."); } - Log($"Service Counts: (Scheduler) Queue: {SocketManager.Shared?.SchedulerPool?.TotalServicedByQueue.ToString()}, Pool: {SocketManager.Shared?.SchedulerPool?.TotalServicedByPool.ToString()}"); + var pool = SocketManager.Shared?.SchedulerPool; + Log($"Service Counts: (Scheduler) Queue: {pool?.TotalServicedByQueue.ToString()}, Pool: {pool?.TotalServicedByPool.ToString()}, Workers: {pool?.WorkerCount.ToString()}, Available: {pool?.AvailableCount.ToString()}"); } protected IServer GetServer(IConnectionMultiplexer muxer) From bc11b96c9829ac7521fcea85ff7f74abcb1dc4dc Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 18 Jan 2022 09:13:41 -0500 Subject: [PATCH 063/117] Fix merge --- src/StackExchange.Redis/PhysicalBridge.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 03bb8d142..5d379905f 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -857,7 +857,7 @@ private void StartBacklogProcessor() // to unblock the thread-pool when there could be sync-over-async callers. Note that in reality, // the initial "enough" of the back-log processor is typically sync, which means that the thread // we start is actually useful, despite thinking "but that will just go async and back to the pool" - var thread = new Thread(s => ((PhysicalBridge)s).ProcessBacklogsAsync().RedisFireAndForget()) + var thread = new Thread(s => ((PhysicalBridge)s).ProcessBacklogAsync().RedisFireAndForget()) { IsBackground = true, // don't keep process alive (also: act like the thread-pool used to) Name = "StackExchange.Redis Backlog", // help anyone looking at thread-dumps @@ -931,7 +931,7 @@ internal enum BacklogStatus : byte /// Process the backlog(s) in play if any. /// This means flushing commands to an available/active connection (if any) or spinning until timeout if not. /// - private async Task ProcessBacklogsAsync() + private async Task ProcessBacklogAsync() { _backlogStatus = BacklogStatus.Starting; try From 47f0a12310e1e19e388e97bf539707820c237d36 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 18 Jan 2022 11:34:40 -0500 Subject: [PATCH 064/117] Format options --- src/StackExchange.Redis/ConfigurationOptions.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ConfigurationOptions.cs b/src/StackExchange.Redis/ConfigurationOptions.cs index 2181a9ae4..1888895b4 100644 --- a/src/StackExchange.Redis/ConfigurationOptions.cs +++ b/src/StackExchange.Redis/ConfigurationOptions.cs @@ -377,7 +377,11 @@ public IReconnectRetryPolicy ReconnectRetryPolicy /// /// The backlog policy to be used for commands when a connection is unhealthy. /// - public BacklogPolicy BacklogPolicy { get => backlogPolicy ?? BacklogPolicy.Default; set => backlogPolicy = value; } + public BacklogPolicy BacklogPolicy + { + get => backlogPolicy ?? BacklogPolicy.Default; + set => backlogPolicy = value; + } /// /// Indicates whether endpoints should be resolved via DNS before connecting. From 49a917d15739a749e37203a8be24678590f69bdc Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 19 Jan 2022 09:38:39 -0500 Subject: [PATCH 065/117] Sync tests --- src/StackExchange.Redis/PhysicalBridge.cs | 1 - src/StackExchange.Redis/PhysicalConnection.cs | 2 +- .../StackExchange.Redis.Tests/BacklogTests.cs | 111 +++++++++++++++++- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 595d9a30d..5d248a5a0 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -188,7 +188,6 @@ public WriteResult TryWriteSync(Message message, bool isReplica) if (!IsConnected) return QueueOrFailMessage(message); var physical = this.physical; - if (physical == null) return FailDueToNoConnection(message); if (physical == null) { // If we're not connected yet and supposed to, queue it up diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 1c6ca81f0..aca7c8f40 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -1597,7 +1597,7 @@ private void OnDebugAbort() var bridge = BridgeCouldBeNull; if (bridge == null || !bridge.Multiplexer.AllowConnect) { - throw new RedisConnectionException(ConnectionFailureType.InternalFailure, "debugging"); + throw new RedisConnectionException(ConnectionFailureType.InternalFailure, "Aborting (AllowConnect: False)"); } } diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index 7cf470efd..9ff6a2cf3 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -1,4 +1,5 @@ using System; +using System.Threading; using System.Threading.Tasks; using Xunit; using Xunit.Abstractions; @@ -107,7 +108,7 @@ void PrintSnapshot(ConnectionMultiplexer muxer) } [Fact] - public async Task QueuesAndFlushesAfterReconnecting() + public async Task QueuesAndFlushesAfterReconnectingAsync() { try { @@ -126,8 +127,10 @@ public async Task QueuesAndFlushesAfterReconnecting() options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); - muxer.ErrorMessage += (s, e) => Writer.WriteLine($"Error Message {e.EndPoint}: {e.Message}"); - muxer.InternalError += (s, e) => Writer.WriteLine($"Internal Error {e.EndPoint}: {e.Exception.Message}"); + muxer.ErrorMessage += (s, e) => Log($"Error Message {e.EndPoint}: {e.Message}"); + muxer.InternalError += (s, e) => Log($"Internal Error {e.EndPoint}: {e.Exception.Message}"); + muxer.ConnectionFailed += (s, a) => Log("Disconnected: " + EndPointCollection.ToString(a.EndPoint)); + muxer.ConnectionRestored += (s, a) => Log("Reconnected: " + EndPointCollection.ToString(a.EndPoint)); var db = muxer.GetDatabase(); Writer.WriteLine("Test: Initial (connected) ping"); @@ -195,5 +198,107 @@ public async Task QueuesAndFlushesAfterReconnecting() ClearAmbientFailures(); } } + + + [Fact] + public async Task QueuesAndFlushesAfterReconnecting() + { + try + { + var options = new ConfigurationOptions() + { + BacklogPolicy = BacklogPolicy.Default, + AbortOnConnectFail = false, + ConnectTimeout = 1000, + ConnectRetry = 2, + SyncTimeout = 10000, + KeepAlive = 10000, + AsyncTimeout = 5000, + AllowAdmin = true, + SocketManager = SocketManager.ThreadPool, + }; + options.EndPoints.Add(TestConfig.Current.MasterServerAndPort); + + using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); + muxer.ErrorMessage += (s, e) => Log($"Error Message {e.EndPoint}: {e.Message}"); + muxer.InternalError += (s, e) => Log($"Internal Error {e.EndPoint}: {e.Exception.Message}"); + muxer.ConnectionFailed += (s, a) => Log("Disconnected: " + EndPointCollection.ToString(a.EndPoint)); + muxer.ConnectionRestored += (s, a) => Log("Reconnected: " + EndPointCollection.ToString(a.EndPoint)); + + var db = muxer.GetDatabase(); + Writer.WriteLine("Test: Initial (connected) ping"); + await db.PingAsync(); + + var server = muxer.GetServerSnapshot()[0]; + var stats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal + + // Fail the connection + Writer.WriteLine("Test: Simulating failure"); + muxer.AllowConnect = false; + server.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(muxer.IsConnected); + + // Queue up some commands + Writer.WriteLine("Test: Disconnected pings"); + + Task[] pings = new Task[3]; + pings[0] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(1)); + pings[1] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(2)); + pings[2] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(3)); + void disconnectedPings(int id) + { + // No need to delay, we're going to try a disconnected connection immediately so it'll fail... + Log($"Pinging (disconnected - {id})"); + var result = db.Ping(); + Log($"Pinging (disconnected - {id}) - result: " + result); + } + Writer.WriteLine("Test: Disconnected pings issued"); + + Assert.False(muxer.IsConnected); + // Give the tasks time to queue + await UntilCondition(TimeSpan.FromSeconds(5), () => server.GetBridgeStatus(RedisCommand.PING).BacklogMessagesPending >= 3); + + var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Log($"Test Stats: (BacklogMessagesPending: {disconnectedStats.BacklogMessagesPending}, TotalBacklogMessagesQueued: {disconnectedStats.TotalBacklogMessagesQueued})"); + Assert.True(disconnectedStats.BacklogMessagesPending >= 3, $"Expected {nameof(disconnectedStats.BacklogMessagesPending)} > 3, got {disconnectedStats.BacklogMessagesPending}"); + + Writer.WriteLine("Test: Allowing reconnect"); + muxer.AllowConnect = true; + Writer.WriteLine("Test: Awaiting reconnect"); + await UntilCondition(TimeSpan.FromSeconds(3), () => muxer.IsConnected).ForAwait(); + + Writer.WriteLine("Test: Checking reconnected 1"); + Assert.True(muxer.IsConnected); + + var afterConnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Writer.WriteLine($"Test: BacklogStatus: {afterConnectedStats.BacklogStatus}, BacklogMessagesPending: {afterConnectedStats.BacklogMessagesPending}, IsWriterActive: {afterConnectedStats.IsWriterActive}, MessagesSinceLastHeartbeat: {afterConnectedStats.MessagesSinceLastHeartbeat}, TotalBacklogMessagesQueued: {afterConnectedStats.TotalBacklogMessagesQueued}"); + + Writer.WriteLine("Test: Awaiting 3 pings"); + await Task.WhenAll(pings); + + Writer.WriteLine("Test: Checking reconnected 2"); + Assert.True(muxer.IsConnected); + var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + Assert.Equal(0, reconnectedStats.BacklogMessagesPending); + + Writer.WriteLine("Test: Pinging again..."); + pings[0] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(4)); + pings[1] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(5)); + pings[2] = RunBlockingSynchronousWithExtraThreadAsync(() => disconnectedPings(6)); + Writer.WriteLine("Test: Last Ping queued"); + + // We should see none queued + Writer.WriteLine("Test: BacklogMessagesPending check"); + Assert.Equal(0, stats.BacklogMessagesPending); + Writer.WriteLine("Test: Awaiting 3 more pings"); + await Task.WhenAll(pings); + Writer.WriteLine("Test: Done"); + } + finally + { + ClearAmbientFailures(); + } + } } } From 8f5bf58357bd13518602ed3a88f3344c8063ff76 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 19 Jan 2022 13:03:30 -0500 Subject: [PATCH 066/117] Debug: lots of pruning In DEBUG we had lots of additional logging around locks and flushes, but the reality is these are in a slower unoptimized state and heavily influencing the results especially of lock contention themselves. This removes a lot of the debug code we had when first building this around timings. --- src/StackExchange.Redis/ExceptionFactory.cs | 4 - src/StackExchange.Redis/Message.cs | 18 --- src/StackExchange.Redis/PhysicalBridge.cs | 119 ++++-------------- src/StackExchange.Redis/PhysicalConnection.cs | 51 +------- 4 files changed, 28 insertions(+), 164 deletions(-) diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 4cc274d24..ca35a16f6 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -244,10 +244,6 @@ internal static Exception Timeout(ConnectionMultiplexer multiplexer, string base Add(data, sb, "Timeout", "timeout", Format.ToString(multiplexer.TimeoutMilliseconds)); try { -#if DEBUG - if (message.QueuePosition >= 0) Add(data, sb, "QueuePosition", null, message.QueuePosition.ToString()); // the position the item was when added to the queue - if ((int)message.ConnectionWriteState >= 0) Add(data, sb, "WriteState", null, message.ConnectionWriteState.ToString()); // what the physical was doing when it was added to the queue -#endif if (message != null && message.TryGetPhysicalState(out var ws, out var rs, out var sentDelta, out var receivedDelta)) { Add(data, sb, "Write-State", null, ws.ToString()); diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index 86336d5c6..89d1dac1d 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -54,20 +54,6 @@ internal abstract class Message : ICompletable { public readonly int Db; -#if DEBUG - internal int QueuePosition { get; private set; } - internal PhysicalConnection.WriteStatus ConnectionWriteState { get; private set; } -#endif - [Conditional("DEBUG")] - [System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "DEBUG uses instance data")] - internal void SetBacklogState(int position, PhysicalConnection physical) - { -#if DEBUG - QueuePosition = position; - ConnectionWriteState = physical?.GetWriteStatus() ?? PhysicalConnection.WriteStatus.NA; -#endif - } - internal const CommandFlags InternalCallFlag = (CommandFlags)128; protected RedisCommand command; @@ -601,10 +587,6 @@ internal bool TrySetResult(T value) internal void SetEnqueued(PhysicalConnection connection) { -#if DEBUG - QueuePosition = -1; - ConnectionWriteState = PhysicalConnection.WriteStatus.NA; -#endif SetWriteTime(); performance?.SetEnqueued(); _enqueuedTo = connection; diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 0e25bd189..a9f5b5b22 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -139,7 +139,6 @@ private WriteResult QueueOrFailMessage(Message message) // you can go in the queue, but we won't be starting // a worker, because the handshake has not completed message.SetEnqueued(null); - message.SetBacklogState(_backlog.Count, null); _backlog.Enqueue(message); return WriteResult.Success; // we'll take it... } @@ -659,61 +658,40 @@ private WriteResult WriteMessageInsideLock(PhysicalConnection physical, Message Multiplexer?.OnInfoMessage($"Reentrant call to WriteMessageTakingWriteLock for {message.CommandAndKey}, {existingMessage.CommandAndKey} is still active"); return WriteResult.NoConnectionAvailable; } -#if DEBUG - int startWriteTime = Environment.TickCount; - try -#endif + + physical.SetWriting(); + if (message is IMultiMessage multiMessage) { - physical.SetWriting(); - if (message is IMultiMessage multiMessage) + var messageIsSent = false; + SelectDatabaseInsideWriteLock(physical, message); // need to switch database *before* the transaction + foreach (var subCommand in multiMessage.GetMessages(physical)) { - var messageIsSent = false; - SelectDatabaseInsideWriteLock(physical, message); // need to switch database *before* the transaction - foreach (var subCommand in multiMessage.GetMessages(physical)) + result = WriteMessageToServerInsideWriteLock(physical, subCommand); + if (result != WriteResult.Success) { - result = WriteMessageToServerInsideWriteLock(physical, subCommand); - if (result != WriteResult.Success) - { - // we screwed up; abort; note that WriteMessageToServer already - // killed the underlying connection - Trace("Unable to write to server"); - message.Fail(ConnectionFailureType.ProtocolFailure, null, "failure before write: " + result.ToString()); - message.Complete(); - return result; - } - //The parent message (next) may be returned from GetMessages - //and should not be marked as sent again below - messageIsSent = messageIsSent || subCommand == message; + // we screwed up; abort; note that WriteMessageToServer already + // killed the underlying connection + Trace("Unable to write to server"); + message.Fail(ConnectionFailureType.ProtocolFailure, null, "failure before write: " + result.ToString()); + message.Complete(); + return result; } - if (!messageIsSent) - { - message.SetRequestSent(); // well, it was attempted, at least... - } - - return WriteResult.Success; + //The parent message (next) may be returned from GetMessages + //and should not be marked as sent again below + messageIsSent = messageIsSent || subCommand == message; } - else + if (!messageIsSent) { - return WriteMessageToServerInsideWriteLock(physical, message); + message.SetRequestSent(); // well, it was attempted, at least... } + + return WriteResult.Success; } -#if DEBUG - finally + else { - int endWriteTime = Environment.TickCount; - int writeDuration = unchecked(endWriteTime - startWriteTime); - if (writeDuration > _maxWriteTime) - { - _maxWriteTime = writeDuration; - _maxWriteCommand = message?.Command ?? default; - } + return WriteMessageToServerInsideWriteLock(physical, message); } -#endif } -#if DEBUG - private volatile int _maxWriteTime = -1; - private RedisCommand _maxWriteCommand; -#endif [Obsolete("prefer async")] internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical, Message message) @@ -796,7 +774,6 @@ private bool PushToBacklog(Message message, bool onlyIfExists) int count = _backlog.Count; - message.SetBacklogState(count, physical); _backlog.Enqueue(message); // The correct way to decide to start backlog process is not based on previously empty @@ -811,9 +788,6 @@ private void StartBacklogProcessor() { if (Interlocked.CompareExchange(ref _backlogProcessorIsRunning, 1, 0) == 0) { -#if DEBUG - _backlogProcessorRequestedTime = Environment.TickCount; -#endif _backlogStatus = BacklogStatus.Activating; #if NET6_0_OR_GREATER @@ -835,9 +809,6 @@ private void StartBacklogProcessor() #endif } } -#if DEBUG - private volatile int _backlogProcessorRequestedTime; -#endif /// /// Crawls from the head of the backlog queue, consuming anything that should have timed out @@ -904,11 +875,6 @@ private async Task ProcessBacklogAsync() #endif try { -#if DEBUG - int tryToAcquireTime = Environment.TickCount; - var msToStartWorker = unchecked(tryToAcquireTime - _backlogProcessorRequestedTime); - int failureCount = 0; -#endif _backlogStatus = BacklogStatus.Starting; while (true) { @@ -923,18 +889,9 @@ private async Task ProcessBacklogAsync() token = await _singleWriterMutex.TryWaitAsync().ConfigureAwait(false); if (token.Success) break; // got the lock; now go do something with it #endif - -#if DEBUG - failureCount++; -#endif } _backlogStatus = BacklogStatus.Started; -#if DEBUG - int acquiredTime = Environment.TickCount; - var msToGetLock = unchecked(acquiredTime - tryToAcquireTime); -#endif - // so now we are the writer; write some things! Message message; var timeout = TimeoutMilliseconds; @@ -955,15 +912,6 @@ private async Task ProcessBacklogAsync() { _backlogStatus = BacklogStatus.RecordingTimeout; var ex = Multiplexer.GetException(WriteResult.TimeoutBeforeWrite, message, ServerEndPoint); -#if DEBUG // additional tracking - ex.Data["Redis-BacklogStartDelay"] = msToStartWorker; - ex.Data["Redis-BacklogGetLockDelay"] = msToGetLock; - if (failureCount != 0) ex.Data["Redis-BacklogFailCount"] = failureCount; - if (_maxWriteTime >= 0) ex.Data["Redis-MaxWrite"] = _maxWriteTime.ToString() + "ms, " + _maxWriteCommand.ToString(); - var maxFlush = physical?.MaxFlushTime ?? -1; - if (maxFlush >= 0) ex.Data["Redis-MaxFlush"] = maxFlush.ToString() + "ms, " + (physical?.MaxFlushBytes ?? -1).ToString(); - if (_maxLockDuration >= 0) ex.Data["Redis-MaxLockDuration"] = _maxLockDuration; -#endif message.SetExceptionAndComplete(ex, this); } else @@ -1154,9 +1102,6 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect if (releaseLock) { -#if DEBUG - RecordLockDuration(lockTaken); -#endif #if NETCOREAPP _singleWriterMutex.Release(); #else @@ -1167,15 +1112,6 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect } } -#if DEBUG - private void RecordLockDuration(int lockTaken) - { - var lockDuration = unchecked(Environment.TickCount - lockTaken); - if (lockDuration > _maxLockDuration) _maxLockDuration = lockDuration; - } - volatile int _maxLockDuration = -1; -#endif - private async ValueTask WriteMessageTakingWriteLockAsync_Awaited( #if NETCOREAPP Task pending, @@ -1195,9 +1131,6 @@ private async ValueTask WriteMessageTakingWriteLockAsync_Awaited( if (!gotLock) return TimedOutBeforeWrite(message); #else using var token = await pending.ForAwait(); -#endif -#if DEBUG - int lockTaken = Environment.TickCount; #endif var result = WriteMessageInsideLock(physical, message); @@ -1208,9 +1141,6 @@ private async ValueTask WriteMessageTakingWriteLockAsync_Awaited( physical.SetIdle(); -#if DEBUG - RecordLockDuration(lockTaken); -#endif return result; } catch (Exception ex) @@ -1252,9 +1182,6 @@ private async ValueTask CompleteWriteAndReleaseLockAsync( } finally { -#if DEBUG - RecordLockDuration(lockTaken); -#endif #if NETCOREAPP _singleWriterMutex.Release(); #endif diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index e2cdc9f97..2924368a4 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -890,18 +890,11 @@ internal static int WriteRaw(Span span, long value, bool withLengthPrefix } [System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "DEBUG uses instance data")] - private async ValueTask FlushAsync_Awaited(PhysicalConnection connection, ValueTask flush, bool throwOnFailure -#if DEBUG - , int startFlush, long flushBytes -#endif - ) + private async ValueTask FlushAsync_Awaited(PhysicalConnection connection, ValueTask flush, bool throwOnFailure) { try { await flush.ForAwait(); -#if DEBUG - RecordEndFlush(startFlush, flushBytes); -#endif connection._writeStatus = WriteStatus.Flushed; connection.UpdateLastWriteTime(); return WriteResult.Success; @@ -943,9 +936,6 @@ internal WriteResult FlushSync(bool throwOnFailure, int millisecondsTimeout) void ThrowTimeout() { -#if DEBUG - if (millisecondsTimeout > _maxFlushTime) _maxFlushTime = millisecondsTimeout; // a fair bet even if we didn't measure -#endif throw new TimeoutException("timeout while synchronously flushing"); } } @@ -956,20 +946,8 @@ internal ValueTask FlushAsync(bool throwOnFailure, CancellationToke try { _writeStatus = WriteStatus.Flushing; -#if DEBUG - int startFlush = Environment.TickCount; - long flushBytes = -1; - if (_ioPipe is SocketConnection sc) flushBytes = sc.GetCounters().BytesWaitingToBeSent; -#endif var flush = tmp.FlushAsync(cancellationToken); - if (!flush.IsCompletedSuccessfully) return FlushAsync_Awaited(this, flush, throwOnFailure -#if DEBUG - , startFlush, flushBytes -#endif - ); -#if DEBUG - RecordEndFlush(startFlush, flushBytes); -#endif + if (!flush.IsCompletedSuccessfully) return FlushAsync_Awaited(this, flush, throwOnFailure); _writeStatus = WriteStatus.Flushed; UpdateLastWriteTime(); return new ValueTask(WriteResult.Success); @@ -980,24 +958,8 @@ internal ValueTask FlushAsync(bool throwOnFailure, CancellationToke return new ValueTask(WriteResult.WriteFailure); } } -#if DEBUG - private void RecordEndFlush(int start, long bytes) - { - var end = Environment.TickCount; - int taken = unchecked(end - start); - if (taken > _maxFlushTime) - { - _maxFlushTime = taken; - if (bytes >= 0) _maxFlushBytes = bytes; - } - } - private volatile int _maxFlushTime = -1; - private long _maxFlushBytes = -1; - internal int MaxFlushTime => _maxFlushTime; - internal long MaxFlushBytes => _maxFlushBytes; -#endif - private static readonly ReadOnlyMemory NullBulkString = Encoding.ASCII.GetBytes("$-1\r\n"), EmptyBulkString = Encoding.ASCII.GetBytes("$0\r\n\r\n"); + private static readonly ReadOnlyMemory NullBulkString = Encoding.ASCII.GetBytes("$-1\r\n"), EmptyBulkString = Encoding.ASCII.GetBytes("$0\r\n\r\n"); private static void WriteUnifiedBlob(PipeWriter writer, byte[] value) { @@ -1676,12 +1638,9 @@ private async Task ReadFromPipe() } } - private static readonly ArenaOptions s_arenaOptions = new ArenaOptions( -#if DEBUG - blockSizeBytes: Unsafe.SizeOf() * 8 // force an absurdly small page size to trigger bugs -#endif - ); + private static readonly ArenaOptions s_arenaOptions = new ArenaOptions(); private readonly Arena _arena = new Arena(s_arenaOptions); + private int ProcessBuffer(ref ReadOnlySequence buffer) { int messageCount = 0; From c958320dbbc479f650f371a4f2a560424e5dfd26 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 19 Jan 2022 20:07:02 -0500 Subject: [PATCH 067/117] Fix revert --- src/StackExchange.Redis/Message.cs | 4 +++- src/StackExchange.Redis/PhysicalBridge.cs | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index 818003847..932cafce0 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -693,9 +693,11 @@ internal void SetPreferMaster() => internal void SetPreferReplica() => Flags = (Flags & ~MaskMasterServerPreference) | CommandFlags.PreferReplica; + /// + /// Note order here reversed to prevent overload resolution errors + /// internal void SetSource(ResultProcessor resultProcessor, IResultBox resultBox) { - // note order here reversed to prevent overload resolution errors this.resultBox = resultBox; this.resultProcessor = resultProcessor; } diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 690e49046..861db5cb0 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -804,7 +804,7 @@ internal WriteResult WriteMessageTakingWriteLockSync(PhysicalConnection physical if (!token.Success) #endif { - // If we can't get it *instantaneously*; pass it to the backlog for throughput + // If we can't get it *instantaneously*, pass it to the backlog for throughput if (TryPushToBacklog(message, onlyIfExists: false)) { return WriteResult.Success; // queued counts as success @@ -1189,7 +1189,7 @@ internal ValueTask WriteMessageTakingWriteLockAsync(PhysicalConnect if (!token.Success) #endif { - // If we can't get it *instantaneously*; pass it to the backlog for throughput + // If we can't get it *instantaneously*, pass it to the backlog for throughput if (TryPushToBacklog(message, onlyIfExists: false, bypassBacklog: bypassBacklog)) { return new ValueTask(WriteResult.Success); // queued counts as success From 2b57b0bb9447e244b400849ce80b251a607c2423 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 19 Jan 2022 20:41:06 -0500 Subject: [PATCH 068/117] Fix merge --- tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 57cea491e..432d064e0 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -152,7 +152,7 @@ public void NoConnectionException(bool abortOnConnect, int connCount, int comple { AbortOnConnectFail = abortOnConnect, BacklogPolicy = BacklogPolicy.FailFast, - ConnectTimeout = 500, + ConnectTimeout = 1000, SyncTimeout = 500, KeepAlive = 5000 }; From 92cecfcbbf214138445f212c47b6bae8e5c516a4 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 19 Jan 2022 23:14:43 -0500 Subject: [PATCH 069/117] WIP: This could all be a bad idea --- .../ConnectionMultiplexer.cs | 2 +- .../Interfaces/ISubscriber.cs | 8 +- src/StackExchange.Redis/PhysicalBridge.cs | 67 ----- src/StackExchange.Redis/RedisSubscriber.cs | 253 +++++++----------- 4 files changed, 109 insertions(+), 221 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 6e0d71d74..b652aff34 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1921,7 +1921,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP } if (!first) { - long subscriptionChanges = ValidateSubscriptions(); + long subscriptionChanges = await EnsureSubscriptionsAsync(); if (subscriptionChanges == 0) { log?.WriteLine("No subscription changes necessary"); diff --git a/src/StackExchange.Redis/Interfaces/ISubscriber.cs b/src/StackExchange.Redis/Interfaces/ISubscriber.cs index b479a8d8d..11e985a0b 100644 --- a/src/StackExchange.Redis/Interfaces/ISubscriber.cs +++ b/src/StackExchange.Redis/Interfaces/ISubscriber.cs @@ -100,8 +100,8 @@ public interface ISubscriber : IRedis EndPoint SubscribedEndpoint(RedisChannel channel); /// - /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is cancelled regardless - /// of the subscribers; if a handler is specified, the subscription is only cancelled if this handler is the + /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is canceled regardless + /// of the subscribers; if a handler is specified, the subscription is only canceled if this handler is the /// last handler remaining against the channel /// /// The channel that was subscribed to. @@ -128,8 +128,8 @@ public interface ISubscriber : IRedis Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None); /// - /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is cancelled regardless - /// of the subscribers; if a handler is specified, the subscription is only cancelled if this handler is the + /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is canceled regardless + /// of the subscribers; if a handler is specified, the subscription is only canceled if this handler is the /// last handler remaining against the channel /// /// The channel that was subscribed to. diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 38abeb672..51abb2996 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -8,7 +8,6 @@ using System.Threading.Channels; using System.Threading.Tasks; using static StackExchange.Redis.ConnectionMultiplexer; -using PendingSubscriptionState = global::StackExchange.Redis.ConnectionMultiplexer.Subscription.PendingSubscriptionState; #if !NETCOREAPP using Pipelines.Sockets.Unofficial.Threading; using static Pipelines.Sockets.Unofficial.Threading.MutexSlim; @@ -102,7 +101,6 @@ public enum State : byte public void Dispose() { isDisposed = true; - ShutdownSubscriptionQueue(); using (var tmp = physical) { physical = null; @@ -221,71 +219,6 @@ internal void GetCounters(ConnectionCounters counters) physical?.GetCounters(counters); } - private Channel _subscriptionBackgroundQueue; - private static readonly UnboundedChannelOptions s_subscriptionQueueOptions = new UnboundedChannelOptions - { - AllowSynchronousContinuations = false, // we do *not* want the async work to end up on the caller's thread - SingleReader = true, // only one reader will be started per channel - SingleWriter = true, // writes will be synchronized, because order matters - }; - - private Channel GetSubscriptionQueue() - { - var queue = _subscriptionBackgroundQueue; - if (queue == null) - { - queue = Channel.CreateUnbounded(s_subscriptionQueueOptions); - var existing = Interlocked.CompareExchange(ref _subscriptionBackgroundQueue, queue, null); - - if (existing != null) return existing; // we didn't win, but that's fine - - // we won (_subqueue is now queue) - // this means we have a new channel without a reader; let's fix that! - Task.Run(() => ExecuteSubscriptionLoop()); - } - return queue; - } - - private void ShutdownSubscriptionQueue() - { - try - { - Interlocked.CompareExchange(ref _subscriptionBackgroundQueue, null, null)?.Writer.TryComplete(); - } - catch { } - } - - private async Task ExecuteSubscriptionLoop() // pushes items that have been enqueued over the bridge - { - // note: this will execute on the default pool rather than our dedicated pool; I'm... OK with this - var queue = _subscriptionBackgroundQueue ?? Interlocked.CompareExchange(ref _subscriptionBackgroundQueue, null, null); // just to be sure we can read it! - try - { - while (await queue.Reader.WaitToReadAsync().ForAwait() && queue.Reader.TryRead(out var next)) - { - try - { - // Treat these commands as background/handshake and do not allow queueing to backlog - if ((await TryWriteAsync(next.Message, next.IsReplica).ForAwait()) != WriteResult.Success) - { - next.Abort(); - } - } - catch (Exception ex) - { - next.Fail(ex); - } - } - } - catch (Exception ex) - { - Multiplexer.OnInternalError(ex, ServerEndPoint?.EndPoint, ConnectionType); - } - } - - internal bool TryEnqueueBackgroundSubscriptionWrite(in PendingSubscriptionState state) - => !isDisposed && (_subscriptionBackgroundQueue ?? GetSubscriptionQueue()).Writer.TryWrite(state); - internal readonly struct BridgeStatus { /// diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 850a10d3b..d8b7dd3ca 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -1,5 +1,5 @@ using System; -using System.Collections.Generic; +using System.Collections.Concurrent; using System.Diagnostics; using System.Net; using System.Runtime.CompilerServices; @@ -11,15 +11,10 @@ namespace StackExchange.Redis { public partial class ConnectionMultiplexer { - private readonly Dictionary subscriptions = new Dictionary(); + private readonly SemaphoreSlim subscriptionsLock = new SemaphoreSlim(1, 1); + private readonly ConcurrentDictionary subscriptions = new ConcurrentDictionary(); - internal int GetSubscriptionsCount() - { - lock (subscriptions) - { - return subscriptions.Count; - } - } + internal int GetSubscriptionsCount() => subscriptions.Count; internal static void CompleteAsWorker(ICompletable completable) { @@ -54,12 +49,7 @@ internal static bool TryCompleteHandler(EventHandler handler, object sende internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) { - Subscription sub; - lock (subscriptions) - { - if (!subscriptions.TryGetValue(channel, out sub)) sub = null; - } - if (sub != null) + if (subscriptions.TryGetValue(channel, out var sub)) { sub.GetSubscriberCounts(out handlers, out queues); return true; @@ -68,36 +58,27 @@ internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out return false; } - internal Task AddSubscription(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) + internal Task AddSubscriptionAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { Task task = null; if (handler != null | queue != null) { - lock (subscriptions) + if (!subscriptions.TryGetValue(channel, out Subscription sub)) { - if (!subscriptions.TryGetValue(channel, out Subscription sub)) - { - sub = new Subscription(); - subscriptions.Add(channel, sub); - task = sub.SubscribeToServer(this, channel, flags, asyncState, false); - } - sub.Add(handler, queue); + sub = new Subscription(flags); + subscriptions.TryAdd(channel, sub); + task = sub.SubscribeToServerAsync(this, channel, flags, asyncState, false); } + sub.Add(handler, queue); } return task ?? CompletedTask.Default(asyncState); } internal ServerEndPoint GetSubscribedServer(in RedisChannel channel) { - if (!channel.IsNullOrEmpty) + if (!channel.IsNullOrEmpty && subscriptions.TryGetValue(channel, out Subscription sub)) { - lock (subscriptions) - { - if (subscriptions.TryGetValue(channel, out Subscription sub)) - { - return sub.GetOwner(); - } - } + return sub.GetCurrentServer(); } return null; } @@ -106,12 +87,9 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i { ICompletable completable = null; ChannelMessageQueue queues = null; - lock (subscriptions) + if (subscriptions.TryGetValue(subscription, out Subscription sub)) { - if (subscriptions.TryGetValue(subscription, out Subscription sub)) - { - completable = sub.ForInvoke(channel, payload, out queues); - } + completable = sub.ForInvoke(channel, payload, out queues); } if (queues != null) ChannelMessageQueue.WriteAll(ref queues, channel, payload); if (completable != null && !completable.TryComplete(false)) ConnectionMultiplexer.CompleteAsWorker(completable); @@ -120,15 +98,13 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i internal Task RemoveAllSubscriptions(CommandFlags flags, object asyncState) { Task last = null; - lock (subscriptions) + foreach (var pair in subscriptions) { - foreach (var pair in subscriptions) + if (subscriptions.TryRemove(pair.Key, out var sub)) { pair.Value.MarkCompleted(); - var task = pair.Value.UnsubscribeFromServer(pair.Key, flags, asyncState, false); - if (task != null) last = task; + last = pair.Value.UnsubscribeFromServerAsync(pair.Key, asyncState, false); } - subscriptions.Clear(); } return last ?? CompletedTask.Default(asyncState); } @@ -136,25 +112,23 @@ internal Task RemoveAllSubscriptions(CommandFlags flags, object asyncState) internal Task RemoveSubscription(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { Task task = null; - lock (subscriptions) + if (subscriptions.TryGetValue(channel, out Subscription sub)) { - if (subscriptions.TryGetValue(channel, out Subscription sub)) + bool removeChannel; + if (handler == null & queue == null) // blanket wipe { - bool remove; - if (handler == null & queue == null) // blanket wipe - { - sub.MarkCompleted(); - remove = true; - } - else - { - remove = sub.Remove(handler, queue); - } - if (remove) - { - subscriptions.Remove(channel); - task = sub.UnsubscribeFromServer(channel, flags, asyncState, false); - } + sub.MarkCompleted(); + removeChannel = true; + } + else + { + removeChannel = sub.Remove(handler, queue); + } + // If it was the last handler or a blanket wipe, remove it. + if (removeChannel) + { + subscriptions.TryRemove(channel, out _); + task = sub.UnsubscribeFromServerAsync(channel, asyncState, false); } } return task ?? CompletedTask.Default(asyncState); @@ -163,12 +137,9 @@ internal Task RemoveSubscription(in RedisChannel channel, Action EnsureSubscriptionsAsync() { - lock (subscriptions) + long count = 0; + foreach (var pair in subscriptions) { - long count = 0; - foreach (var pair in subscriptions) + if (await pair.Value.EnsureSubscribedAsync(this, pair.Key)) { - if (pair.Value.Validate(this, pair.Key)) count++; + count++; } - return count; } + return count; } internal sealed class Subscription { private Action _handlers; private ChannelMessageQueue _queues; - private ServerEndPoint owner; + private ServerEndPoint CurrentServer; + public CommandFlags Flags { get; } + + public Subscription(CommandFlags flags) + { + Flags = flags; + } + + private Message GetMessage( + RedisChannel channel, + RedisCommand command, + object asyncState, + bool internalCall, + out TaskCompletionSource taskSource) + { + var msg = Message.Create(-1, Flags, command, channel); + if (internalCall) msg.SetInternalCall(); + + var source = TaskResultBox.Create(out taskSource, asyncState); + msg.SetSource(ResultProcessor.TrackSubscriptions, source); + return msg; + } public void Add(Action handler, ChannelMessageQueue queue) { @@ -225,90 +217,58 @@ public bool Remove(Action handler, ChannelMessageQueue return _handlers == null & _queues == null; } - [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "RCS1210:Return completed task instead of returning null.", Justification = "Intentional for efficient success check")] - public Task SubscribeToServer(ConnectionMultiplexer multiplexer, in RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) + public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { + var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster - var selected = multiplexer.SelectServer(RedisCommand.SUBSCRIBE, flags, default(RedisKey)); - var bridge = selected?.GetBridge(ConnectionType.Subscription, true); - if (bridge == null) return null; - - // note: check we can create the message validly *before* we swap the owner over (Interlocked) - var state = PendingSubscriptionState.Create(channel, this, flags, true, internalCall, asyncState, selected.IsReplica); + var selected = multiplexer.SelectServer(command, flags, default(RedisKey)); - if (Interlocked.CompareExchange(ref owner, selected, null) != null) return null; + if (Interlocked.CompareExchange(ref CurrentServer, selected, null) != null) + { + // Abort + return false; + } try { - if (!bridge.TryEnqueueBackgroundSubscriptionWrite(state)) + var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); + var success = await multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, null, selected); + if (!success) { - state.Abort(); - return null; + taskSource.SetCanceled(); } - return state.Task; + return await taskSource.Task; } catch { // clear the owner if it is still us - Interlocked.CompareExchange(ref owner, null, selected); + Interlocked.CompareExchange(ref CurrentServer, null, selected); throw; } } - [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "RCS1210:Return completed task instead of returning null.", Justification = "Intentional for efficient success check")] - public Task UnsubscribeFromServer(in RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) - { - var oldOwner = Interlocked.Exchange(ref owner, null); - var bridge = oldOwner?.GetBridge(ConnectionType.Subscription, false); - if (bridge == null) return null; - - var state = PendingSubscriptionState.Create(channel, this, flags, false, internalCall, asyncState, oldOwner.IsReplica); - - if (!bridge.TryEnqueueBackgroundSubscriptionWrite(state)) - { - state.Abort(); - return null; - } - return state.Task; - } - - internal readonly struct PendingSubscriptionState + public async Task UnsubscribeFromServerAsync(RedisChannel channel, object asyncState, bool internalCall) { - public override string ToString() => Message.ToString(); - public Subscription Subscription { get; } - public Message Message { get; } - public bool IsReplica { get; } - public Task Task => _taskSource.Task; - private readonly TaskCompletionSource _taskSource; - - public static PendingSubscriptionState Create(RedisChannel channel, Subscription subscription, CommandFlags flags, bool subscribe, bool internalCall, object asyncState, bool isReplica) - => new PendingSubscriptionState(asyncState, channel, subscription, flags, subscribe, internalCall, isReplica); - - public void Abort() => _taskSource.TrySetCanceled(); - public void Fail(Exception ex) => _taskSource.TrySetException(ex); - - private PendingSubscriptionState(object asyncState, RedisChannel channel, Subscription subscription, CommandFlags flags, bool subscribe, bool internalCall, bool isReplica) + var command = channel.IsPatternBased ? RedisCommand.PUNSUBSCRIBE : RedisCommand.UNSUBSCRIBE; + var oldOwner = Interlocked.Exchange(ref CurrentServer, null); + if (oldOwner != null) { - var cmd = subscribe - ? (channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE) - : (channel.IsPatternBased ? RedisCommand.PUNSUBSCRIBE : RedisCommand.UNSUBSCRIBE); - var msg = Message.Create(-1, flags, cmd, channel); - if (internalCall) msg.SetInternalCall(); - - var source = TaskResultBox.Create(out _taskSource, asyncState); - msg.SetSource(ResultProcessor.TrackSubscriptions, source); - - Subscription = subscription; - Message = msg; - IsReplica = isReplica; + var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); + var success = await oldOwner.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, null, oldOwner); + if (!success) + { + taskSource.SetCanceled(); + } + return await taskSource.Task; } + return false; } - internal ServerEndPoint GetOwner() => Volatile.Read(ref owner); + internal ServerEndPoint GetCurrentServer() => Volatile.Read(ref CurrentServer); internal void Resubscribe(in RedisChannel channel, ServerEndPoint server) { // Only re-subscribe to the original server - if (server != null && GetOwner() == server) + if (server != null && GetCurrentServer() == server) { var cmd = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; var msg = Message.Create(-1, CommandFlags.FireAndForget, cmd, channel); @@ -317,21 +277,20 @@ internal void Resubscribe(in RedisChannel channel, ServerEndPoint server) } } - internal bool Validate(ConnectionMultiplexer multiplexer, in RedisChannel channel) + internal async ValueTask EnsureSubscribedAsync(ConnectionMultiplexer multiplexer, RedisChannel channel) { bool changed = false; - var oldOwner = Volatile.Read(ref owner); + var oldOwner = Volatile.Read(ref CurrentServer); + // If the old server is bad, unsubscribe if (oldOwner != null && !oldOwner.IsSelectable(RedisCommand.PSUBSCRIBE)) { - if (UnsubscribeFromServer(channel, CommandFlags.FireAndForget, null, true) != null) - { - changed = true; - } + changed = await UnsubscribeFromServerAsync(channel, null, true); oldOwner = null; } - if (oldOwner == null && SubscribeToServer(multiplexer, channel, CommandFlags.FireAndForget, null, true) != null) + // If we didn't have an owner or just cleared one, subscribe + if (oldOwner == null) { - changed = true; + changed = await SubscribeToServerAsync(multiplexer, channel, CommandFlags.FireAndForget, null, true); } return changed; } @@ -422,10 +381,7 @@ public Task IdentifyEndpointAsync(RedisChannel channel, CommandFlags f return ExecuteAsync(msg, ResultProcessor.ConnectionIdentity); } - public bool IsConnected(RedisChannel channel = default(RedisChannel)) - { - return multiplexer.SubscriberConnected(channel); - } + public bool IsConnected(RedisChannel channel = default(RedisChannel)) => multiplexer.SubscriberConnected(channel); public override TimeSpan Ping(CommandFlags flags = CommandFlags.None) { @@ -459,7 +415,7 @@ private Message CreatePingMessage(CommandFlags flags) RedisValue channel = multiplexer.UniqueId; msg = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); } - // Ensure the ping is sent over the intended subscriver connection, which wouldn't happen in GetBridge() by default with PING; + // Ensure the ping is sent over the intended subscriber connection, which wouldn't happen in GetBridge() by default with PING; msg.SetForSubscriptionBridge(); return msg; } @@ -500,7 +456,7 @@ Task ISubscriber.SubscribeAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { if (channel.IsNullOrEmpty) throw new ArgumentNullException(nameof(channel)); - return multiplexer.AddSubscription(channel, handler, queue, flags, asyncState); + return multiplexer.AddSubscriptionAsync(channel, handler, queue, flags, asyncState); } internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) @@ -521,6 +477,7 @@ public EndPoint SubscribedEndpoint(RedisChannel channel) void ISubscriber.Unsubscribe(RedisChannel channel, Action handler, CommandFlags flags) => Unsubscribe(channel, handler, null, flags); + public void Unsubscribe(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { var task = UnsubscribeAsync(channel, handler, queue, flags); @@ -533,10 +490,8 @@ public void UnsubscribeAll(CommandFlags flags = CommandFlags.None) if ((flags & CommandFlags.FireAndForget) == 0) Wait(task); } - public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) - { - return multiplexer.RemoveAllSubscriptions(flags, asyncState); - } + public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) => + multiplexer.RemoveAllSubscriptions(flags, asyncState); Task ISubscriber.UnsubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) => UnsubscribeAsync(channel, handler, null, flags); From 7d7f0207edb36c9222f5bacbb9cd342f76c908fc Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 10:53:53 -0500 Subject: [PATCH 070/117] Gap commit Want to yank some of this into another PR ahead of time, getting files in. --- .../ConnectionMultiplexer.Threading.cs | 50 +++++ .../ConnectionMultiplexer.Verbose.cs | 59 ++++++ .../ConnectionMultiplexer.cs | 43 +--- src/StackExchange.Redis/RedisSubscriber.cs | 190 +++++++----------- .../ServerSelectionStrategy.cs | 25 ++- 5 files changed, 205 insertions(+), 162 deletions(-) create mode 100644 src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs create mode 100644 src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs b/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs new file mode 100644 index 000000000..f23d010cc --- /dev/null +++ b/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs @@ -0,0 +1,50 @@ +using System; +using System.Threading; +using Pipelines.Sockets.Unofficial; + +namespace StackExchange.Redis +{ + public partial class ConnectionMultiplexer + { + private static readonly WaitCallback s_CompleteAsWorker = s => ((ICompletable)s).TryComplete(true); + internal static void CompleteAsWorker(ICompletable completable) + { + if (completable != null) + { + ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); + } + } + + internal static bool TryCompleteHandler(EventHandler handler, object sender, T args, bool isAsync) where T : EventArgs, ICompletable + { + if (handler == null) return true; + if (isAsync) + { + if (handler.IsSingle()) + { + try + { + handler(sender, args); + } + catch { } + } + else + { + foreach (EventHandler sub in handler.AsEnumerable()) + { + try + { + sub(sender, args); + } + catch { } + } + } + return true; + } + else + { + return false; + } + } + } +} diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs b/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs new file mode 100644 index 000000000..7a61096b6 --- /dev/null +++ b/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs @@ -0,0 +1,59 @@ +using System; +using System.Diagnostics; +using System.Net; +using System.Runtime.CompilerServices; + +namespace StackExchange.Redis +{ + public partial class ConnectionMultiplexer + { + internal event Action MessageFaulted; + internal event Action Closing; + internal event Action PreTransactionExec, TransactionLog, InfoMessage; + internal event Action Connecting; + internal event Action Resurrecting; + + partial void OnTrace(string message, string category); + static partial void OnTraceWithoutContext(string message, string category); + + [Conditional("VERBOSE")] + internal void Trace(string message, [CallerMemberName] string category = null) => OnTrace(message, category); + + [Conditional("VERBOSE")] + internal void Trace(bool condition, string message, [CallerMemberName] string category = null) + { + if (condition) OnTrace(message, category); + } + + [Conditional("VERBOSE")] + internal static void TraceWithoutContext(string message, [CallerMemberName] string category = null) => OnTraceWithoutContext(message, category); + + [Conditional("VERBOSE")] + internal static void TraceWithoutContext(bool condition, string message, [CallerMemberName] string category = null) + { + if (condition) OnTraceWithoutContext(message, category); + } + + [Conditional("VERBOSE")] + internal void OnMessageFaulted(Message msg, Exception fault, [CallerMemberName] string origin = default, [CallerFilePath] string path = default, [CallerLineNumber] int lineNumber = default) => + MessageFaulted?.Invoke(msg?.CommandAndKey, fault, $"{origin} ({path}#{lineNumber})"); + + [Conditional("VERBOSE")] + internal void OnInfoMessage(string message) => InfoMessage?.Invoke(message); + + [Conditional("VERBOSE")] + internal void OnClosing(bool complete) => Closing?.Invoke(complete); + + [Conditional("VERBOSE")] + internal void OnConnecting(EndPoint endpoint, ConnectionType connectionType) => Connecting?.Invoke(endpoint, connectionType); + + [Conditional("VERBOSE")] + internal void OnResurrecting(EndPoint endpoint, ConnectionType connectionType) => Resurrecting.Invoke(endpoint, connectionType); + + [Conditional("VERBOSE")] + internal void OnPreTransactionExec(Message message) => PreTransactionExec?.Invoke(message.CommandAndKey); + + [Conditional("VERBOSE")] + internal void OnTransactionLog(string message) => TransactionLog?.Invoke(message); + } +} diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index b652aff34..2254cf50a 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1504,33 +1504,6 @@ public IServer GetServer(EndPoint endpoint, object asyncState = null) return new RedisServer(this, server, asyncState); } - [Conditional("VERBOSE")] - internal void Trace(string message, [CallerMemberName] string category = null) - { - OnTrace(message, category); - } - - [Conditional("VERBOSE")] - internal void Trace(bool condition, string message, [CallerMemberName] string category = null) - { - if (condition) OnTrace(message, category); - } - - partial void OnTrace(string message, string category); - static partial void OnTraceWithoutContext(string message, string category); - - [Conditional("VERBOSE")] - internal static void TraceWithoutContext(string message, [CallerMemberName] string category = null) - { - OnTraceWithoutContext(message, category); - } - - [Conditional("VERBOSE")] - internal static void TraceWithoutContext(bool condition, string message, [CallerMemberName] string category = null) - { - if (condition) OnTraceWithoutContext(message, category); - } - /// /// The number of operations that have been performed on all connections /// @@ -2174,16 +2147,14 @@ internal void UpdateClusterRange(ClusterConfiguration configuration) private IDisposable pulse; - internal ServerEndPoint SelectServer(Message message) - { - if (message == null) return null; - return ServerSelectionStrategy.Select(message); - } + internal ServerEndPoint SelectServer(Message message) => + message == null ? null : ServerSelectionStrategy.Select(message); - internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisKey key) - { - return ServerSelectionStrategy.Select(command, key, flags); - } + internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisKey key) => + ServerSelectionStrategy.Select(command, key, flags); + + internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisChannel channel) => + ServerSelectionStrategy.Select(command, channel, flags); private bool PrepareToPushMessageToBridge(Message message, ResultProcessor processor, IResultBox resultBox, ref ServerEndPoint server) { diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index d8b7dd3ca..6ded6952f 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -1,8 +1,6 @@ using System; using System.Collections.Concurrent; -using System.Diagnostics; using System.Net; -using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Pipelines.Sockets.Unofficial; @@ -11,42 +9,11 @@ namespace StackExchange.Redis { public partial class ConnectionMultiplexer { - private readonly SemaphoreSlim subscriptionsLock = new SemaphoreSlim(1, 1); - private readonly ConcurrentDictionary subscriptions = new ConcurrentDictionary(); + private readonly SemaphoreSlim subscriptionsAddLock = new SemaphoreSlim(1, 1); + private readonly ConcurrentDictionary subscriptions = new(); internal int GetSubscriptionsCount() => subscriptions.Count; - internal static void CompleteAsWorker(ICompletable completable) - { - if (completable != null) ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); - } - - private static readonly WaitCallback s_CompleteAsWorker = s => ((ICompletable)s).TryComplete(true); - - internal static bool TryCompleteHandler(EventHandler handler, object sender, T args, bool isAsync) where T : EventArgs, ICompletable - { - if (handler == null) return true; - if (isAsync) - { - if (handler.IsSingle()) - { - try { handler(sender, args); } catch { } - } - else - { - foreach (EventHandler sub in handler.AsEnumerable()) - { - try { sub(sender, args); } catch { } - } - } - return true; - } - else - { - return false; - } - } - internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) { if (subscriptions.TryGetValue(channel, out var sub)) @@ -58,20 +25,22 @@ internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out return false; } - internal Task AddSubscriptionAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) + internal async Task AddSubscriptionAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { - Task task = null; if (handler != null | queue != null) { if (!subscriptions.TryGetValue(channel, out Subscription sub)) { sub = new Subscription(flags); subscriptions.TryAdd(channel, sub); - task = sub.SubscribeToServerAsync(this, channel, flags, asyncState, false); + if (!(await sub.SubscribeToServerAsync(this, channel, flags, asyncState, false))) + { + return false; + } } sub.Add(handler, queue); } - return task ?? CompletedTask.Default(asyncState); + return true; } internal ServerEndPoint GetSubscribedServer(in RedisChannel channel) @@ -91,11 +60,17 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i { completable = sub.ForInvoke(channel, payload, out queues); } - if (queues != null) ChannelMessageQueue.WriteAll(ref queues, channel, payload); - if (completable != null && !completable.TryComplete(false)) ConnectionMultiplexer.CompleteAsWorker(completable); + if (queues != null) + { + ChannelMessageQueue.WriteAll(ref queues, channel, payload); + } + if (completable != null && !completable.TryComplete(false)) + { + CompleteAsWorker(completable); + } } - internal Task RemoveAllSubscriptions(CommandFlags flags, object asyncState) + internal Task RemoveAllSubscriptionsAsync(CommandFlags flags, object asyncState) { Task last = null; foreach (var pair in subscriptions) @@ -109,7 +84,7 @@ internal Task RemoveAllSubscriptions(CommandFlags flags, object asyncState) return last ?? CompletedTask.Default(asyncState); } - internal Task RemoveSubscription(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) + internal Task RemoveSubscriptionAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { Task task = null; if (subscriptions.TryGetValue(channel, out Subscription sub)) @@ -145,9 +120,7 @@ internal void ResendSubscriptions(ServerEndPoint server) internal bool SubscriberConnected(in RedisChannel channel = default(RedisChannel)) { - // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster - var server = GetSubscribedServer(channel) ?? SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); - + var server = GetSubscribedServer(channel) ?? SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, channel); return server?.IsConnected == true && server.IsSubscriberConnected; } @@ -171,10 +144,7 @@ internal sealed class Subscription private ServerEndPoint CurrentServer; public CommandFlags Flags { get; } - public Subscription(CommandFlags flags) - { - Flags = flags; - } + public Subscription(CommandFlags flags) => Flags = flags; private Message GetMessage( RedisChannel channel, @@ -184,7 +154,11 @@ private Message GetMessage( out TaskCompletionSource taskSource) { var msg = Message.Create(-1, Flags, command, channel); - if (internalCall) msg.SetInternalCall(); + msg.SetForSubscriptionBridge(); + if (internalCall) + { + msg.SetInternalCall(); + } var source = TaskResultBox.Create(out taskSource, asyncState); msg.SetSource(ResultProcessor.TrackSubscriptions, source); @@ -193,8 +167,14 @@ private Message GetMessage( public void Add(Action handler, ChannelMessageQueue queue) { - if (handler != null) _handlers += handler; - if (queue != null) ChannelMessageQueue.Combine(ref _queues, queue); + if (handler != null) + { + _handlers += handler; + } + if (queue != null) + { + ChannelMessageQueue.Combine(ref _queues, queue); + } } public ICompletable ForInvoke(in RedisChannel channel, in RedisValue message, out ChannelMessageQueue queues) @@ -212,35 +192,42 @@ internal void MarkCompleted() public bool Remove(Action handler, ChannelMessageQueue queue) { - if (handler != null) _handlers -= handler; - if (queue != null) ChannelMessageQueue.Remove(ref _queues, queue); + if (handler != null) + { + _handlers -= handler; + } + if (queue != null) + { + ChannelMessageQueue.Remove(ref _queues, queue); + } return _handlers == null & _queues == null; } public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; - // TODO: default(RedisKey) is incorrect here - should shard based on the channel in cluster - var selected = multiplexer.SelectServer(command, flags, default(RedisKey)); + var selected = multiplexer.SelectServer(command, flags, channel); - if (Interlocked.CompareExchange(ref CurrentServer, selected, null) != null) + // Do we have a server already? And is it connected? Then bail out. + if (CurrentServer?.IsSubscriberConnected == true) { - // Abort return false; } + // Otherwise try and subscribe on the server side try { var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); - var success = await multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, null, selected); + // TODO: Could move this entirely into a processor, e.g. the CurrentServer removal we need below + var success = await multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, selected); if (!success) { - taskSource.SetCanceled(); + taskSource.SetResult(false); } return await taskSource.Task; } catch { - // clear the owner if it is still us + // If there was an exception, clear the owner Interlocked.CompareExchange(ref CurrentServer, null, selected); throw; } @@ -253,7 +240,7 @@ public async Task UnsubscribeFromServerAsync(RedisChannel channel, object if (oldOwner != null) { var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); - var success = await oldOwner.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, null, oldOwner); + var success = await oldOwner.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, oldOwner); if (!success) { taskSource.SetCanceled(); @@ -314,51 +301,6 @@ internal void GetSubscriberCounts(out int handlers, out int queues) } } } - - internal string GetConnectionName(EndPoint endPoint, ConnectionType connectionType) - => GetServerEndPoint(endPoint)?.GetBridge(connectionType, false)?.PhysicalName; - - internal event Action MessageFaulted; - internal event Action Closing; - internal event Action PreTransactionExec, TransactionLog, InfoMessage; - internal event Action Connecting; - internal event Action Resurrecting; - - [Conditional("VERBOSE")] - internal void OnMessageFaulted(Message msg, Exception fault, [CallerMemberName] string origin = default, [CallerFilePath] string path = default, [CallerLineNumber] int lineNumber = default) - { - MessageFaulted?.Invoke(msg?.CommandAndKey, fault, $"{origin} ({path}#{lineNumber})"); - } - [Conditional("VERBOSE")] - internal void OnInfoMessage(string message) - { - InfoMessage?.Invoke(message); - } - [Conditional("VERBOSE")] - internal void OnClosing(bool complete) - { - Closing?.Invoke(complete); - } - [Conditional("VERBOSE")] - internal void OnConnecting(EndPoint endpoint, ConnectionType connectionType) - { - Connecting?.Invoke(endpoint, connectionType); - } - [Conditional("VERBOSE")] - internal void OnResurrecting(EndPoint endpoint, ConnectionType connectionType) - { - Resurrecting.Invoke(endpoint, connectionType); - } - [Conditional("VERBOSE")] - internal void OnPreTransactionExec(Message message) - { - PreTransactionExec?.Invoke(message.CommandAndKey); - } - [Conditional("VERBOSE")] - internal void OnTransactionLog(string message) - { - TransactionLog?.Invoke(message); - } } internal sealed class RedisSubscriber : RedisBase, ISubscriber @@ -422,14 +364,20 @@ private Message CreatePingMessage(CommandFlags flags) public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) { - if (channel.IsNullOrEmpty) throw new ArgumentNullException(nameof(channel)); + if (channel.IsNullOrEmpty) + { + throw new ArgumentNullException(nameof(channel)); + } var msg = Message.Create(-1, flags, RedisCommand.PUBLISH, channel, message); return ExecuteSync(msg, ResultProcessor.Int64); } public Task PublishAsync(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) { - if (channel.IsNullOrEmpty) throw new ArgumentNullException(nameof(channel)); + if (channel.IsNullOrEmpty) + { + throw new ArgumentNullException(nameof(channel)); + } var msg = Message.Create(-1, flags, RedisCommand.PUBLISH, channel, message); return ExecuteAsync(msg, ResultProcessor.Int64); } @@ -455,7 +403,10 @@ Task ISubscriber.SubscribeAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { - if (channel.IsNullOrEmpty) throw new ArgumentNullException(nameof(channel)); + if (channel.IsNullOrEmpty) + { + throw new ArgumentNullException(nameof(channel)); + } return multiplexer.AddSubscriptionAsync(channel, handler, queue, flags, asyncState); } @@ -469,11 +420,7 @@ public async Task SubscribeAsync(RedisChannel channel, Comm return queue; } - public EndPoint SubscribedEndpoint(RedisChannel channel) - { - var server = multiplexer.GetSubscribedServer(channel); - return server?.EndPoint; - } + public EndPoint SubscribedEndpoint(RedisChannel channel) => multiplexer.GetSubscribedServer(channel)?.EndPoint; void ISubscriber.Unsubscribe(RedisChannel channel, Action handler, CommandFlags flags) => Unsubscribe(channel, handler, null, flags); @@ -490,15 +437,18 @@ public void UnsubscribeAll(CommandFlags flags = CommandFlags.None) if ((flags & CommandFlags.FireAndForget) == 0) Wait(task); } - public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) => - multiplexer.RemoveAllSubscriptions(flags, asyncState); + public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) => multiplexer.RemoveAllSubscriptionsAsync(flags, asyncState); Task ISubscriber.UnsubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) => UnsubscribeAsync(channel, handler, null, flags); + public Task UnsubscribeAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { - if (channel.IsNullOrEmpty) throw new ArgumentNullException(nameof(channel)); - return multiplexer.RemoveSubscription(channel, handler, queue, flags, asyncState); + if (channel.IsNullOrEmpty) + { + throw new ArgumentNullException(nameof(channel)); + } + return multiplexer.RemoveSubscriptionAsync(channel, handler, queue, flags, asyncState); } } } diff --git a/src/StackExchange.Redis/ServerSelectionStrategy.cs b/src/StackExchange.Redis/ServerSelectionStrategy.cs index ac9e664ca..cb4bb954c 100644 --- a/src/StackExchange.Redis/ServerSelectionStrategy.cs +++ b/src/StackExchange.Redis/ServerSelectionStrategy.cs @@ -58,19 +58,26 @@ public ServerSelectionStrategy(ConnectionMultiplexer multiplexer) internal static int TotalSlots => RedisClusterSlotCount; /// - /// Computes the hash-slot that would be used by the given key + /// Computes the hash-slot that would be used by the given key. /// /// The to determine a slot ID for. public int HashSlot(in RedisKey key) - => ServerType == ServerType.Standalone ? NoSlot : GetClusterSlot(key); + => ServerType == ServerType.Standalone || key.IsNull ? NoSlot : GetClusterSlot((byte[])key); - private static unsafe int GetClusterSlot(in RedisKey key) + /// + /// Computes the hash-slot that would be used by the given channel. + /// + /// The to determine a slot ID for. + public int HashSlot(in RedisChannel channel) + => ServerType == ServerType.Standalone || channel.IsNull ? NoSlot : GetClusterSlot((byte[])channel); + + /// + /// HASH_SLOT = CRC16(key) mod 16384 + /// + private static unsafe int GetClusterSlot(byte[] blob) { - //HASH_SLOT = CRC16(key) mod 16384 - if (key.IsNull) return NoSlot; unchecked { - var blob = (byte[])key; fixed (byte* ptr = blob) { fixed (ushort* crc16tab = s_crc16tab) @@ -116,6 +123,12 @@ public ServerEndPoint Select(RedisCommand command, in RedisKey key, CommandFlags return Select(slot, command, flags); } + public ServerEndPoint Select(RedisCommand command, in RedisChannel channel, CommandFlags flags) + { + int slot = ServerType == ServerType.Cluster ? HashSlot(channel) : NoSlot; + return Select(slot, command, flags); + } + public bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved) { try From f24798094c4093bf155329ac80388233731fff84 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 11:06:31 -0500 Subject: [PATCH 071/117] Pub/Sub: default to 3.0, fix PING, fix server selection in cluster, and cleanup In prep for changes to how we handle subscriptions internally, this does several things: - Upgrades default Redis server assumption to 3.x - Routes PING on Subscription keepalives over the subscription bridge appropriately - Fixes cluster sharding from default(RedisKey) to shared logic for RedisChannel as well (both in byte[] form) - General code cleanup in the area (getting a lot of DEBUG/VERBOSE noise into isolated files) --- .../ConfigurationOptions.cs | 2 +- .../ConnectionMultiplexer.Threading.cs | 50 +++++++++ .../ConnectionMultiplexer.Verbose.cs | 59 ++++++++++ .../ConnectionMultiplexer.cs | 45 ++------ src/StackExchange.Redis/Enums/CommandFlags.cs | 2 + src/StackExchange.Redis/ExceptionFactory.cs | 4 +- .../Interfaces/ISubscriber.cs | 8 +- src/StackExchange.Redis/Message.cs | 12 +- src/StackExchange.Redis/PhysicalBridge.cs | 1 + src/StackExchange.Redis/RedisBatch.cs | 2 +- src/StackExchange.Redis/RedisSubscriber.cs | 82 +------------- src/StackExchange.Redis/ServerEndPoint.cs | 56 ++++++--- .../ServerSelectionStrategy.cs | 25 ++++- tests/StackExchange.Redis.Tests/AsyncTests.cs | 2 +- tests/StackExchange.Redis.Tests/Config.cs | 2 +- .../ConnectFailTimeout.cs | 2 +- .../ConnectingFailDetection.cs | 6 +- .../ConnectionShutdown.cs | 2 +- tests/StackExchange.Redis.Tests/PubSub.cs | 106 +++++++++++++++--- tests/StackExchange.Redis.Tests/TestBase.cs | 5 + 20 files changed, 308 insertions(+), 165 deletions(-) create mode 100644 src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs create mode 100644 src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs diff --git a/src/StackExchange.Redis/ConfigurationOptions.cs b/src/StackExchange.Redis/ConfigurationOptions.cs index d733353cc..138096120 100644 --- a/src/StackExchange.Redis/ConfigurationOptions.cs +++ b/src/StackExchange.Redis/ConfigurationOptions.cs @@ -306,7 +306,7 @@ public int ConnectTimeout /// public Version DefaultVersion { - get => defaultVersion ?? (IsAzureEndpoint() ? RedisFeatures.v4_0_0 : RedisFeatures.v2_8_0); + get => defaultVersion ?? (IsAzureEndpoint() ? RedisFeatures.v4_0_0 : RedisFeatures.v3_0_0); set => defaultVersion = value; } diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs b/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs new file mode 100644 index 000000000..f23d010cc --- /dev/null +++ b/src/StackExchange.Redis/ConnectionMultiplexer.Threading.cs @@ -0,0 +1,50 @@ +using System; +using System.Threading; +using Pipelines.Sockets.Unofficial; + +namespace StackExchange.Redis +{ + public partial class ConnectionMultiplexer + { + private static readonly WaitCallback s_CompleteAsWorker = s => ((ICompletable)s).TryComplete(true); + internal static void CompleteAsWorker(ICompletable completable) + { + if (completable != null) + { + ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); + } + } + + internal static bool TryCompleteHandler(EventHandler handler, object sender, T args, bool isAsync) where T : EventArgs, ICompletable + { + if (handler == null) return true; + if (isAsync) + { + if (handler.IsSingle()) + { + try + { + handler(sender, args); + } + catch { } + } + else + { + foreach (EventHandler sub in handler.AsEnumerable()) + { + try + { + sub(sender, args); + } + catch { } + } + } + return true; + } + else + { + return false; + } + } + } +} diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs b/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs new file mode 100644 index 000000000..7a61096b6 --- /dev/null +++ b/src/StackExchange.Redis/ConnectionMultiplexer.Verbose.cs @@ -0,0 +1,59 @@ +using System; +using System.Diagnostics; +using System.Net; +using System.Runtime.CompilerServices; + +namespace StackExchange.Redis +{ + public partial class ConnectionMultiplexer + { + internal event Action MessageFaulted; + internal event Action Closing; + internal event Action PreTransactionExec, TransactionLog, InfoMessage; + internal event Action Connecting; + internal event Action Resurrecting; + + partial void OnTrace(string message, string category); + static partial void OnTraceWithoutContext(string message, string category); + + [Conditional("VERBOSE")] + internal void Trace(string message, [CallerMemberName] string category = null) => OnTrace(message, category); + + [Conditional("VERBOSE")] + internal void Trace(bool condition, string message, [CallerMemberName] string category = null) + { + if (condition) OnTrace(message, category); + } + + [Conditional("VERBOSE")] + internal static void TraceWithoutContext(string message, [CallerMemberName] string category = null) => OnTraceWithoutContext(message, category); + + [Conditional("VERBOSE")] + internal static void TraceWithoutContext(bool condition, string message, [CallerMemberName] string category = null) + { + if (condition) OnTraceWithoutContext(message, category); + } + + [Conditional("VERBOSE")] + internal void OnMessageFaulted(Message msg, Exception fault, [CallerMemberName] string origin = default, [CallerFilePath] string path = default, [CallerLineNumber] int lineNumber = default) => + MessageFaulted?.Invoke(msg?.CommandAndKey, fault, $"{origin} ({path}#{lineNumber})"); + + [Conditional("VERBOSE")] + internal void OnInfoMessage(string message) => InfoMessage?.Invoke(message); + + [Conditional("VERBOSE")] + internal void OnClosing(bool complete) => Closing?.Invoke(complete); + + [Conditional("VERBOSE")] + internal void OnConnecting(EndPoint endpoint, ConnectionType connectionType) => Connecting?.Invoke(endpoint, connectionType); + + [Conditional("VERBOSE")] + internal void OnResurrecting(EndPoint endpoint, ConnectionType connectionType) => Resurrecting.Invoke(endpoint, connectionType); + + [Conditional("VERBOSE")] + internal void OnPreTransactionExec(Message message) => PreTransactionExec?.Invoke(message.CommandAndKey); + + [Conditional("VERBOSE")] + internal void OnTransactionLog(string message) => TransactionLog?.Invoke(message); + } +} diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 0cc31e76b..6e42daaed 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -1504,33 +1504,6 @@ public IServer GetServer(EndPoint endpoint, object asyncState = null) return new RedisServer(this, server, asyncState); } - [Conditional("VERBOSE")] - internal void Trace(string message, [CallerMemberName] string category = null) - { - OnTrace(message, category); - } - - [Conditional("VERBOSE")] - internal void Trace(bool condition, string message, [CallerMemberName] string category = null) - { - if (condition) OnTrace(message, category); - } - - partial void OnTrace(string message, string category); - static partial void OnTraceWithoutContext(string message, string category); - - [Conditional("VERBOSE")] - internal static void TraceWithoutContext(string message, [CallerMemberName] string category = null) - { - OnTraceWithoutContext(message, category); - } - - [Conditional("VERBOSE")] - internal static void TraceWithoutContext(bool condition, string message, [CallerMemberName] string category = null) - { - if (condition) OnTraceWithoutContext(message, category); - } - /// /// The number of operations that have been performed on all connections /// @@ -1773,7 +1746,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP { var server = servers[i]; var task = available[i]; - var bs = server.GetBridgeStatus(RedisCommand.PING); + var bs = server.GetBridgeStatus(ConnectionType.Interactive); log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {bs.MessagesSinceLastHeartbeat}, qs: {bs.Connection.MessagesSentAwaitingResponse}, in: {bs.Connection.BytesAvailableOnSocket}, qu: {bs.MessagesSinceLastHeartbeat}, aw: {bs.IsWriterActive}, in-pipe: {bs.Connection.BytesInReadPipe}, out-pipe: {bs.Connection.BytesInWritePipe}, bw: {bs.BacklogStatus}, rs: {bs.Connection.ReadStatus}. ws: {bs.Connection.WriteStatus})"); } @@ -2174,16 +2147,14 @@ internal void UpdateClusterRange(ClusterConfiguration configuration) private IDisposable pulse; - internal ServerEndPoint SelectServer(Message message) - { - if (message == null) return null; - return ServerSelectionStrategy.Select(message); - } + internal ServerEndPoint SelectServer(Message message) => + message == null ? null : ServerSelectionStrategy.Select(message); - internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisKey key) - { - return ServerSelectionStrategy.Select(command, key, flags); - } + internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisKey key) => + ServerSelectionStrategy.Select(command, key, flags); + + internal ServerEndPoint SelectServer(RedisCommand command, CommandFlags flags, in RedisChannel channel) => + ServerSelectionStrategy.Select(command, channel, flags); private bool PrepareToPushMessageToBridge(Message message, ResultProcessor processor, IResultBox resultBox, ref ServerEndPoint server) { diff --git a/src/StackExchange.Redis/Enums/CommandFlags.cs b/src/StackExchange.Redis/Enums/CommandFlags.cs index f0a670d76..c1efc65c1 100644 --- a/src/StackExchange.Redis/Enums/CommandFlags.cs +++ b/src/StackExchange.Redis/Enums/CommandFlags.cs @@ -82,5 +82,7 @@ public enum CommandFlags NoScriptCache = 512, // 1024: used for timed-out; never user-specified, so not visible on the public API + + // 2048: Use subscription connection type; never user-specified, so not visible on the public API } } diff --git a/src/StackExchange.Redis/ExceptionFactory.cs b/src/StackExchange.Redis/ExceptionFactory.cs index 4cc274d24..fe7aabc3c 100644 --- a/src/StackExchange.Redis/ExceptionFactory.cs +++ b/src/StackExchange.Redis/ExceptionFactory.cs @@ -312,7 +312,7 @@ ServerEndPoint server // Add server data, if we have it if (server != null && message != null) { - var bs = server.GetBridgeStatus(message.Command); + var bs = server.GetBridgeStatus(message.IsForSubscriptionBridge ? ConnectionType.Subscription: ConnectionType.Interactive); switch (bs.Connection.ReadStatus) { @@ -338,7 +338,7 @@ ServerEndPoint server if (multiplexer.StormLogThreshold >= 0 && bs.Connection.MessagesSentAwaitingResponse >= multiplexer.StormLogThreshold && Interlocked.CompareExchange(ref multiplexer.haveStormLog, 1, 0) == 0) { - var log = server.GetStormLog(message.Command); + var log = server.GetStormLog(message); if (string.IsNullOrWhiteSpace(log)) Interlocked.Exchange(ref multiplexer.haveStormLog, 0); else Interlocked.Exchange(ref multiplexer.stormLogSnapshot, log); } diff --git a/src/StackExchange.Redis/Interfaces/ISubscriber.cs b/src/StackExchange.Redis/Interfaces/ISubscriber.cs index b479a8d8d..11e985a0b 100644 --- a/src/StackExchange.Redis/Interfaces/ISubscriber.cs +++ b/src/StackExchange.Redis/Interfaces/ISubscriber.cs @@ -100,8 +100,8 @@ public interface ISubscriber : IRedis EndPoint SubscribedEndpoint(RedisChannel channel); /// - /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is cancelled regardless - /// of the subscribers; if a handler is specified, the subscription is only cancelled if this handler is the + /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is canceled regardless + /// of the subscribers; if a handler is specified, the subscription is only canceled if this handler is the /// last handler remaining against the channel /// /// The channel that was subscribed to. @@ -128,8 +128,8 @@ public interface ISubscriber : IRedis Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None); /// - /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is cancelled regardless - /// of the subscribers; if a handler is specified, the subscription is only cancelled if this handler is the + /// Unsubscribe from a specified message channel; note; if no handler is specified, the subscription is canceled regardless + /// of the subscribers; if a handler is specified, the subscription is only canceled if this handler is the /// last handler remaining against the channel /// /// The channel that was subscribed to. diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index 86336d5c6..a7ecaf824 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -74,7 +74,8 @@ internal void SetBacklogState(int position, PhysicalConnection physical) private const CommandFlags AskingFlag = (CommandFlags)32, ScriptUnavailableFlag = (CommandFlags)256, - NeedsAsyncTimeoutCheckFlag = (CommandFlags)1024; + NeedsAsyncTimeoutCheckFlag = (CommandFlags)1024, + DemandSubscriptionConnection = (CommandFlags)2048; private const CommandFlags MaskMasterServerPreference = CommandFlags.DemandMaster | CommandFlags.DemandReplica @@ -670,6 +671,15 @@ internal void SetWriteTime() private int _writeTickCount; public int GetWriteTime() => Volatile.Read(ref _writeTickCount); + /// + /// Gets if this command should be sent over the subscription bridge. + /// + internal bool IsForSubscriptionBridge => (Flags & DemandSubscriptionConnection) != 0; + /// + /// Sends this command to the subscription connection rather than the interactive. + /// + internal void SetForSubscriptionBridge() => Flags |= DemandSubscriptionConnection; + private void SetNeedsTimeoutCheck() => Flags |= NeedsAsyncTimeoutCheckFlag; internal bool HasAsyncTimedOut(int now, int timeoutMilliseconds, out int millisecondsTaken) { diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 0e25bd189..38abeb672 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -364,6 +364,7 @@ internal void KeepAlive() if (commandMap.IsAvailable(RedisCommand.PING) && features.PingOnSubscriber) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.PING); + msg.SetForSubscriptionBridge(); msg.SetSource(ResultProcessor.Tracer, null); } else if (commandMap.IsAvailable(RedisCommand.UNSUBSCRIBE)) diff --git a/src/StackExchange.Redis/RedisBatch.cs b/src/StackExchange.Redis/RedisBatch.cs index 6f4d70700..7abe234c5 100644 --- a/src/StackExchange.Redis/RedisBatch.cs +++ b/src/StackExchange.Redis/RedisBatch.cs @@ -30,7 +30,7 @@ public void Execute() FailNoServer(snapshot); throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server); } - var bridge = server.GetBridge(message.Command); + var bridge = server.GetBridge(message); if (bridge == null) { FailNoServer(snapshot); diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 22b9664b7..2d6788d25 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -1,8 +1,6 @@ using System; using System.Collections.Generic; -using System.Diagnostics; using System.Net; -using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Pipelines.Sockets.Unofficial; @@ -13,34 +11,11 @@ public partial class ConnectionMultiplexer { private readonly Dictionary subscriptions = new Dictionary(); - internal static void CompleteAsWorker(ICompletable completable) + internal int GetSubscriptionsCount() { - if (completable != null) ThreadPool.QueueUserWorkItem(s_CompleteAsWorker, completable); - } - - private static readonly WaitCallback s_CompleteAsWorker = s => ((ICompletable)s).TryComplete(true); - - internal static bool TryCompleteHandler(EventHandler handler, object sender, T args, bool isAsync) where T : EventArgs, ICompletable - { - if (handler == null) return true; - if (isAsync) - { - if (handler.IsSingle()) - { - try { handler(sender, args); } catch { } - } - else - { - foreach (EventHandler sub in handler.AsEnumerable()) - { - try { sub(sender, args); } catch { } - } - } - return true; - } - else + lock (subscriptions) { - return false; + return subscriptions.Count; } } @@ -106,7 +81,7 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i } } if (queues != null) ChannelMessageQueue.WriteAll(ref queues, channel, payload); - if (completable != null && !completable.TryComplete(false)) ConnectionMultiplexer.CompleteAsWorker(completable); + if (completable != null && !completable.TryComplete(false)) CompleteAsWorker(completable); } internal Task RemoveAllSubscriptions(CommandFlags flags, object asyncState) @@ -169,7 +144,7 @@ internal void ResendSubscriptions(ServerEndPoint server) var server = GetSubscribedServer(channel); if (server != null) return server.IsConnected; - server = SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, default(RedisKey)); + server = SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, channel); return server?.IsConnected == true; } @@ -221,7 +196,7 @@ public bool Remove(Action handler, ChannelMessageQueue [System.Diagnostics.CodeAnalysis.SuppressMessage("Usage", "RCS1210:Return completed task instead of returning null.", Justification = "Intentional for efficient success check")] public Task SubscribeToServer(ConnectionMultiplexer multiplexer, in RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { - var selected = multiplexer.SelectServer(RedisCommand.SUBSCRIBE, flags, default(RedisKey)); + var selected = multiplexer.SelectServer(RedisCommand.SUBSCRIBE, flags, channel); var bridge = selected?.GetBridge(ConnectionType.Subscription, true); if (bridge == null) return null; @@ -348,51 +323,6 @@ internal void GetSubscriberCounts(out int handlers, out int queues) } } } - - internal string GetConnectionName(EndPoint endPoint, ConnectionType connectionType) - => GetServerEndPoint(endPoint)?.GetBridge(connectionType, false)?.PhysicalName; - - internal event Action MessageFaulted; - internal event Action Closing; - internal event Action PreTransactionExec, TransactionLog, InfoMessage; - internal event Action Connecting; - internal event Action Resurrecting; - - [Conditional("VERBOSE")] - internal void OnMessageFaulted(Message msg, Exception fault, [CallerMemberName] string origin = default, [CallerFilePath] string path = default, [CallerLineNumber] int lineNumber = default) - { - MessageFaulted?.Invoke(msg?.CommandAndKey, fault, $"{origin} ({path}#{lineNumber})"); - } - [Conditional("VERBOSE")] - internal void OnInfoMessage(string message) - { - InfoMessage?.Invoke(message); - } - [Conditional("VERBOSE")] - internal void OnClosing(bool complete) - { - Closing?.Invoke(complete); - } - [Conditional("VERBOSE")] - internal void OnConnecting(EndPoint endpoint, ConnectionType connectionType) - { - Connecting?.Invoke(endpoint, connectionType); - } - [Conditional("VERBOSE")] - internal void OnResurrecting(EndPoint endpoint, ConnectionType connectionType) - { - Resurrecting.Invoke(endpoint, connectionType); - } - [Conditional("VERBOSE")] - internal void OnPreTransactionExec(Message message) - { - PreTransactionExec?.Invoke(message.CommandAndKey); - } - [Conditional("VERBOSE")] - internal void OnTransactionLog(string message) - { - TransactionLog?.Invoke(message); - } } internal sealed class RedisSubscriber : RedisBase, ISubscriber diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index f41e360a3..ab2889e65 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -74,6 +74,8 @@ public ServerEndPoint(ConnectionMultiplexer multiplexer, EndPoint endpoint) public bool IsConnected => interactive?.IsConnected == true; + public bool IsSubscriberConnected => subscription?.IsConnected == true; + public bool IsConnecting => interactive?.IsConnecting == true; private readonly List> _pendingConnectionMonitors = new List>(); @@ -92,7 +94,7 @@ async Task IfConnectedAsync(LogProxy log, bool sendTracerIfConnected, bo } if (sendTracerIfConnected) { - await SendTracer(log).ForAwait(); + await SendTracerAsync(log).ForAwait(); } log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync already connected end"); return "Already connected"; @@ -209,6 +211,28 @@ public PhysicalBridge GetBridge(ConnectionType type, bool create = true, LogProx }; } + public PhysicalBridge GetBridge(Message message, bool create = true) + { + if (isDisposed) return null; + + // Subscription commands go to a specific bridge - so we need to set that up. + // There are other commands we need to send to the right connection (e.g. subscriber PING with an explicit SetForSubscriptionBridge call), + // but these always go subscriber. + switch (message.Command) + { + case RedisCommand.SUBSCRIBE: + case RedisCommand.UNSUBSCRIBE: + case RedisCommand.PSUBSCRIBE: + case RedisCommand.PUNSUBSCRIBE: + message.SetForSubscriptionBridge(); + break; + } + + return message.IsForSubscriptionBridge + ? subscription ?? (create ? subscription = CreateBridge(ConnectionType.Subscription, null) : null) + : interactive ?? (create ? interactive = CreateBridge(ConnectionType.Interactive, null) : null); + } + public PhysicalBridge GetBridge(RedisCommand command, bool create = true) { if (isDisposed) return null; @@ -281,9 +305,9 @@ public void SetUnselectable(UnselectableFlags flags) public override string ToString() => Format.ToString(EndPoint); [Obsolete("prefer async")] - public WriteResult TryWriteSync(Message message) => GetBridge(message.Command)?.TryWriteSync(message, isReplica) ?? WriteResult.NoConnectionAvailable; + public WriteResult TryWriteSync(Message message) => GetBridge(message)?.TryWriteSync(message, isReplica) ?? WriteResult.NoConnectionAvailable; - public ValueTask TryWriteAsync(Message message) => GetBridge(message.Command)?.TryWriteAsync(message, isReplica) ?? new ValueTask(WriteResult.NoConnectionAvailable); + public ValueTask TryWriteAsync(Message message) => GetBridge(message)?.TryWriteAsync(message, isReplica) ?? new ValueTask(WriteResult.NoConnectionAvailable); internal void Activate(ConnectionType type, LogProxy log) { @@ -445,11 +469,11 @@ internal ServerCounters GetCounters() return counters; } - internal BridgeStatus GetBridgeStatus(RedisCommand command) + internal BridgeStatus GetBridgeStatus(ConnectionType connectionType) { try { - return GetBridge(command, false)?.GetStatus() ?? BridgeStatus.Zero; + return GetBridge(connectionType, false)?.GetStatus() ?? BridgeStatus.Zero; } catch (Exception ex) { // only needs to be best efforts @@ -484,9 +508,9 @@ internal byte[] GetScriptHash(string script, RedisCommand command) return found; } - internal string GetStormLog(RedisCommand command) + internal string GetStormLog(Message message) { - var bridge = GetBridge(command); + var bridge = GetBridge(message); return bridge?.GetStormLog(); } @@ -686,7 +710,7 @@ internal void OnHeartbeat() } } - internal Task WriteDirectAsync(Message message, ResultProcessor processor, object asyncState = null, PhysicalBridge bridge = null) + internal Task WriteDirectAsync(Message message, ResultProcessor processor, PhysicalBridge bridge = null) { static async Task Awaited(ServerEndPoint @this, Message message, ValueTask write, TaskCompletionSource tcs) { @@ -699,9 +723,9 @@ static async Task Awaited(ServerEndPoint @this, Message message, ValueTask.Create(out var tcs, asyncState); + var source = TaskResultBox.Create(out var tcs, null); message.SetSource(processor, source); - if (bridge == null) bridge = GetBridge(message.Command); + if (bridge == null) bridge = GetBridge(message); WriteResult result; if (bridge == null) @@ -733,7 +757,7 @@ internal void WriteDirectFireAndForgetSync(Message message, ResultProcessor SendTracer(LogProxy log = null) + internal Task SendTracerAsync(LogProxy log = null) { var msg = GetTracerMessage(false); msg = LoggingMessage.Create(log, msg); @@ -783,6 +807,9 @@ internal string Summary() return sb.ToString(); } + /// + /// Write the message directly to the pipe or fail...will not queue. + /// internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection connection, Message message, ResultProcessor processor) { static async ValueTask Awaited(ValueTask l_result) => await l_result.ForAwait(); @@ -794,7 +821,7 @@ internal ValueTask WriteDirectOrQueueFireAndForgetAsync(PhysicalConnection co if (connection == null) { Multiplexer.Trace($"{Format.ToString(this)}: Enqueue (async): " + message); - result = GetBridge(message.Command).TryWriteAsync(message, isReplica); + result = GetBridge(message).TryWriteAsync(message, isReplica); } else { @@ -886,7 +913,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) log?.WriteLine($"{Format.ToString(this)}: Sending critical tracer (handshake): {tracer.CommandAndKey}"); await WriteDirectOrQueueFireAndForgetAsync(connection, tracer, ResultProcessor.EstablishConnection).ForAwait(); - // note: this **must** be the last thing on the subscription handshake, because after this + // Note: this **must** be the last thing on the subscription handshake, because after this // we will be in subscriber mode: regular commands cannot be sent if (connType == ConnectionType.Subscription) { @@ -894,6 +921,7 @@ private async Task HandshakeAsync(PhysicalConnection connection, LogProxy log) if (configChannel != null) { msg = Message.Create(-1, CommandFlags.FireAndForget, RedisCommand.SUBSCRIBE, (RedisChannel)configChannel); + // Note: this is NOT internal, we want it to queue in a backlog for sending when ready if necessary await WriteDirectOrQueueFireAndForgetAsync(connection, msg, ResultProcessor.TrackSubscriptions).ForAwait(); } } diff --git a/src/StackExchange.Redis/ServerSelectionStrategy.cs b/src/StackExchange.Redis/ServerSelectionStrategy.cs index ac9e664ca..cb4bb954c 100644 --- a/src/StackExchange.Redis/ServerSelectionStrategy.cs +++ b/src/StackExchange.Redis/ServerSelectionStrategy.cs @@ -58,19 +58,26 @@ public ServerSelectionStrategy(ConnectionMultiplexer multiplexer) internal static int TotalSlots => RedisClusterSlotCount; /// - /// Computes the hash-slot that would be used by the given key + /// Computes the hash-slot that would be used by the given key. /// /// The to determine a slot ID for. public int HashSlot(in RedisKey key) - => ServerType == ServerType.Standalone ? NoSlot : GetClusterSlot(key); + => ServerType == ServerType.Standalone || key.IsNull ? NoSlot : GetClusterSlot((byte[])key); - private static unsafe int GetClusterSlot(in RedisKey key) + /// + /// Computes the hash-slot that would be used by the given channel. + /// + /// The to determine a slot ID for. + public int HashSlot(in RedisChannel channel) + => ServerType == ServerType.Standalone || channel.IsNull ? NoSlot : GetClusterSlot((byte[])channel); + + /// + /// HASH_SLOT = CRC16(key) mod 16384 + /// + private static unsafe int GetClusterSlot(byte[] blob) { - //HASH_SLOT = CRC16(key) mod 16384 - if (key.IsNull) return NoSlot; unchecked { - var blob = (byte[])key; fixed (byte* ptr = blob) { fixed (ushort* crc16tab = s_crc16tab) @@ -116,6 +123,12 @@ public ServerEndPoint Select(RedisCommand command, in RedisKey key, CommandFlags return Select(slot, command, flags); } + public ServerEndPoint Select(RedisCommand command, in RedisChannel channel, CommandFlags flags) + { + int slot = ServerType == ServerType.Cluster ? HashSlot(channel) : NoSlot; + return Select(slot, command, flags); + } + public bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved) { try diff --git a/tests/StackExchange.Redis.Tests/AsyncTests.cs b/tests/StackExchange.Redis.Tests/AsyncTests.cs index 5ee26f815..4dd36670b 100644 --- a/tests/StackExchange.Redis.Tests/AsyncTests.cs +++ b/tests/StackExchange.Redis.Tests/AsyncTests.cs @@ -19,7 +19,7 @@ public void AsyncTasksReportFailureIfServerUnavailable() { SetExpectedAmbientFailureCount(-1); // this will get messy - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { var server = conn.GetServer(TestConfig.Current.MasterServer, TestConfig.Current.MasterPort); diff --git a/tests/StackExchange.Redis.Tests/Config.cs b/tests/StackExchange.Redis.Tests/Config.cs index a2d0d7034..2a5cf6625 100644 --- a/tests/StackExchange.Redis.Tests/Config.cs +++ b/tests/StackExchange.Redis.Tests/Config.cs @@ -14,7 +14,7 @@ namespace StackExchange.Redis.Tests { public class Config : TestBase { - public Version DefaultVersion = new (2, 8, 0); + public Version DefaultVersion = new (3, 0, 0); public Version DefaultAzureVersion = new (4, 0, 0); public Config(ITestOutputHelper output) : base(output) { } diff --git a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs index c52082d12..73af84fa4 100644 --- a/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs +++ b/tests/StackExchange.Redis.Tests/ConnectFailTimeout.cs @@ -13,7 +13,7 @@ public ConnectFailTimeout(ITestOutputHelper output) : base (output) { } public async Task NoticesConnectFail() { SetExpectedAmbientFailureCount(-1); - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { var server = conn.GetServer(conn.GetEndPoints()[0]); diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 9d9c88f8c..fb0b84d21 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -105,17 +105,17 @@ public async Task Issue922_ReconnectRaised() int failCount = 0, restoreCount = 0; - using (var muxer = ConnectionMultiplexer.Connect(config, log: Writer)) + using (var muxer = ConnectionMultiplexer.Connect(config)) { muxer.ConnectionFailed += (s, e) => { Interlocked.Increment(ref failCount); - Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Failed ({e.ConnectionType}, {e.FailureType}): {e.Exception}"); }; muxer.ConnectionRestored += (s, e) => { Interlocked.Increment(ref restoreCount); - Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + Log($"Connection Restored ({e.ConnectionType}, {e.FailureType})"); }; muxer.GetDatabase(); diff --git a/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs b/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs index a4e720772..d75054ca4 100644 --- a/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs +++ b/tests/StackExchange.Redis.Tests/ConnectionShutdown.cs @@ -14,7 +14,7 @@ public ConnectionShutdown(ITestOutputHelper output) : base(output) { } [Fact(Skip = "Unfriendly")] public async Task ShutdownRaisesConnectionFailedAndRestore() { - using (var conn = Create(allowAdmin: true)) + using (var conn = Create(allowAdmin: true, shared: false)) { int failed = 0, restored = 0; Stopwatch watch = Stopwatch.StartNew(); diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 0e4131913..646041055 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -1,11 +1,13 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; using StackExchange.Redis.Maintenance; +using StackExchange.Redis.Profiling; using Xunit; using Xunit.Abstractions; // ReSharper disable AccessToModifiedClosure @@ -55,7 +57,7 @@ await UntilCondition(TimeSpan.FromSeconds(10), [InlineData("Foo:", true, "f")] public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string breaker) { - using (var muxer = Create(channelPrefix: channelPrefix)) + using (var muxer = Create(channelPrefix: channelPrefix, log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); @@ -91,6 +93,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br await PingAsync(muxer, pub, sub, 3).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); @@ -124,7 +127,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br [Fact] public async Task TestBasicPubSubFireAndForget() { - using (var muxer = Create()) + using (var muxer = Create(log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); @@ -155,6 +158,7 @@ public async Task TestBasicPubSubFireAndForget() var count = sub.Publish(key, "def", CommandFlags.FireAndForget); await PingAsync(muxer, pub, sub).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); @@ -182,9 +186,7 @@ private static async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, I // way to prove that is to use TPL objects var t1 = sub.PingAsync(); var t2 = pub.PingAsync(); - await Task.Delay(100).ForAwait(); // especially useful when testing any-order mode - - if (!Task.WaitAll(new[] { t1, t2 }, muxer.TimeoutMilliseconds * 2)) throw new TimeoutException(); + await Task.WhenAll(t1, t2).ForAwait(); } } @@ -220,6 +222,7 @@ public async Task TestPatternPubSub() var count = sub.Publish("abc", "def"); await PingAsync(muxer, pub, sub).ForAwait(); + await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) { Assert.Single(received); @@ -348,7 +351,7 @@ await sub.SubscribeAsync(channel, (_, val) => [Fact] public async Task PubSubGetAllCorrectOrder() { - using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000)) + using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000, log: Writer)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); @@ -520,6 +523,9 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); + // Give a delay between subscriptions and when we try to publish to be safe + await Task.Delay(1000).ForAwait(); + lock (syncLock) { for (int i = 0; i < count; i++) @@ -743,8 +749,10 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - using (var muxer = Create(allowAdmin: true, shared: false)) + var session = new ProfilingSession(); + using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) { + muxer.RegisterProfiler(() => session); RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); int counter = 0; @@ -752,23 +760,89 @@ await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); }).ConfigureAwait(false); + + var profile1 = session.FinishProfiling(); + foreach (var command in profile1) + { + Log($"{command.EndPoint}: {command}"); + } + // We shouldn't see the initial connection here + Assert.Equal(0, profile1.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + + Assert.Equal(1, muxer.GetSubscriptionsCount()); + await Task.Delay(200).ConfigureAwait(false); + await sub.PublishAsync(channel, "abc").ConfigureAwait(false); sub.Ping(); await Task.Delay(200).ConfigureAwait(false); - Assert.Equal(1, Thread.VolatileRead(ref counter)); + + var counter1 = Thread.VolatileRead(ref counter); + Log($"Expecting 1 messsage, got {counter1}"); + Assert.Equal(1, counter1); + var server = GetServer(muxer); - Assert.Equal(1, server.GetCounters().Subscription.SocketCount); + var socketCount = server.GetCounters().Subscription.SocketCount; + Log($"Expecting 1 socket, got {socketCount}"); + Assert.Equal(1, socketCount); + + // We might fail both connections or just the primary in the time period + SetExpectedAmbientFailureCount(-1); + // Make sure we fail all the way + muxer.AllowConnect = false; + Log("Failing connection"); + // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); - SetExpectedAmbientFailureCount(2); - await Task.Delay(200).ConfigureAwait(false); - sub.Ping(); - Assert.Equal(2, server.GetCounters().Subscription.SocketCount); - await sub.PublishAsync(channel, "abc").ConfigureAwait(false); - await Task.Delay(200).ConfigureAwait(false); + // Trigger failure + Assert.Throws(() => sub.Ping()); + Assert.False(sub.IsConnected(channel)); + + // Now reconnect... + muxer.AllowConnect = true; + Log("Waiting on reconnect"); + // Wait until we're reconnected + await UntilCondition(TimeSpan.FromSeconds(10), () => sub.IsConnected(channel)); + Log("Reconnected"); + // Ensure we're reconnected + Assert.True(sub.IsConnected(channel)); + + // And time to resubscribe... + await Task.Delay(1000).ConfigureAwait(false); + + // Ensure we've sent the subscribe command after reconnecting + var profile2 = session.FinishProfiling(); + foreach (var command in profile2) + { + Log($"{command.EndPoint}: {command}"); + } + //Assert.Equal(1, profile2.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); + + Log($"Issuing ping after reconnected"); sub.Ping(); - Assert.Equal(2, Thread.VolatileRead(ref counter)); + Assert.Equal(1, muxer.GetSubscriptionsCount()); + + Log("Publishing"); + var published = await sub.PublishAsync(channel, "abc").ConfigureAwait(false); + + Log($"Published to {published} subscriber(s)."); + Assert.Equal(1, published); + + // Give it a few seconds to get our messages + Log("Waiting for 2 messages"); + await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); + + var counter2 = Thread.VolatileRead(ref counter); + Log($"Expecting 2 messsages, got {counter2}"); + Assert.Equal(2, counter2); + + // Log all commands at the end + Log("All commands since connecting:"); + var profile3 = session.FinishProfiling(); + foreach (var command in profile3) + { + Log($"{command.EndPoint}: {command}"); + } } } } diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index a74521acb..0cca4e5b7 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -128,6 +128,7 @@ protected void OnConnectionFailed(object sender, ConnectionFailedEventArgs e) { privateExceptions.Add($"{Time()}: Connection failed ({e.FailureType}): {EndPointCollection.ToString(e.EndPoint)}/{e.ConnectionType}: {e.Exception}"); } + Log($"Connection Failed ({e.ConnectionType},{e.FailureType}): {e.Exception}"); } protected void OnInternalError(object sender, InternalErrorEventArgs e) @@ -285,6 +286,10 @@ internal virtual IInternalConnectionMultiplexer Create( caller); muxer.InternalError += OnInternalError; muxer.ConnectionFailed += OnConnectionFailed; + muxer.ConnectionRestored += (s, e) => + { + Log($"Connection Restored ({e.ConnectionType},{e.FailureType}): {e.Exception}"); + }; return muxer; } From 6ecde2a5ed3f696ae0eb390b52342683980aa157 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 11:15:14 -0500 Subject: [PATCH 072/117] Include PING routing --- src/StackExchange.Redis/RedisSubscriber.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 2d6788d25..9b2b9c18d 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -372,16 +372,20 @@ private Message CreatePingMessage(CommandFlags flags, out ServerEndPoint server) catch { } } + Message msg; if (usePing) { - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); + msg = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); } else { // can't use regular PING, but we can unsubscribe from something random that we weren't even subscribed to... RedisValue channel = multiplexer.UniqueId; - return ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); + msg = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.UNSUBSCRIBE, channel); } + // Ensure the ping is sent over the intended subscriver connection, which wouldn't happen in GetBridge() by default with PING; + msg.SetForSubscriptionBridge(); + return msg; } public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) From f91e4c5e8c66850edf5c9555700d1abb80b8852a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 11:18:10 -0500 Subject: [PATCH 073/117] Revert testing change --- tests/StackExchange.Redis.Tests/PubSub.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 646041055..2ca92a430 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -523,9 +523,6 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); - // Give a delay between subscriptions and when we try to publish to be safe - await Task.Delay(1000).ForAwait(); - lock (syncLock) { for (int i = 0; i < count; i++) From daa1b9c4ca375b56e4957e89b658ab7be50206b9 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 11:54:41 -0500 Subject: [PATCH 074/117] Revert that bandaid test --- tests/StackExchange.Redis.Tests/PubSub.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 646041055..2ca92a430 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -523,9 +523,6 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); - // Give a delay between subscriptions and when we try to publish to be safe - await Task.Delay(1000).ForAwait(); - lock (syncLock) { for (int i = 0; i < count; i++) From 70e1735ade62e112a3af4976d71236a3fde07135 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 12:10:38 -0500 Subject: [PATCH 075/117] Nope. --- tests/StackExchange.Redis.Tests/PubSub.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 646041055..2ca92a430 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -523,9 +523,6 @@ public async Task PubSubGetAllCorrectOrder_OnMessage_Async() }); await sub.PingAsync().ForAwait(); - // Give a delay between subscriptions and when we try to publish to be safe - await Task.Delay(1000).ForAwait(); - lock (syncLock) { for (int i = 0; i < count; i++) From a814231c133a3fae5999388c3bd37e5df610a3b7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Thu, 20 Jan 2022 13:47:50 -0500 Subject: [PATCH 076/117] Bits --- src/StackExchange.Redis/RedisSubscriber.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 6e36c5a92..0de5ccc5f 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -27,7 +27,7 @@ internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out internal async Task AddSubscriptionAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { - if (handler != null | queue != null) + if (handler != null || queue != null) { if (!subscriptions.TryGetValue(channel, out Subscription sub)) { @@ -207,9 +207,6 @@ public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer { var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; var selected = multiplexer.SelectServer(command, flags, channel); - // TODO: look at this case - var bridge = selected?.GetBridge(ConnectionType.Subscription, true); - if (bridge == null) return false; // Do we have a server already? And is it connected? Then bail out. if (CurrentServer?.IsSubscriberConnected == true) From 1d4b4ad63f80305ca8e9cebc560420d88907fe20 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 21 Jan 2022 17:20:20 -0500 Subject: [PATCH 077/117] Sync work stop commit (moving to laptop!) --- src/StackExchange.Redis/RedisSubscriber.cs | 76 +++++++++++++++++----- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 0de5ccc5f..36000e9ed 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -25,6 +25,24 @@ internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out return false; } + internal bool AddSubscription(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + { + if (handler != null || queue != null) + { + if (!subscriptions.TryGetValue(channel, out Subscription sub)) + { + sub = new Subscription(flags); + subscriptions.TryAdd(channel, sub); + if (!sub.SubscribeToServer(this, channel, flags, false)) + { + return false; + } + } + sub.Add(handler, queue); + } + return true; + } + internal async Task AddSubscriptionAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) { if (handler != null || queue != null) @@ -146,12 +164,7 @@ internal sealed class Subscription public Subscription(CommandFlags flags) => Flags = flags; - private Message GetMessage( - RedisChannel channel, - RedisCommand command, - object asyncState, - bool internalCall, - out TaskCompletionSource taskSource) + private Message GetMessage(RedisChannel channel, RedisCommand command, bool internalCall) { var msg = Message.Create(-1, Flags, command, channel); msg.SetForSubscriptionBridge(); @@ -159,9 +172,6 @@ private Message GetMessage( { msg.SetInternalCall(); } - - var source = TaskResultBox.Create(out taskSource, asyncState); - msg.SetSource(ResultProcessor.TrackSubscriptions, source); return msg; } @@ -203,10 +213,34 @@ public bool Remove(Action handler, ChannelMessageQueue return _handlers == null & _queues == null; } + public bool SubscribeToServer(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, bool internalCall) + { + ServerEndPoint selected = null; + // Do we have a server already? And is it connected? Then bail out. + if (CurrentServer?.IsSubscriberConnected == true) + { + return false; + } + // Otherwise try and subscribe on the server side + try + { + var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; + selected = multiplexer.SelectServer(command, flags, channel); + + var message = GetMessage(channel, command, internalCall); + return multiplexer.ExecuteSyncImpl(message, ResultProcessor.TrackSubscriptions, selected); + } + catch + { + // If there was an exception, clear the owner + Interlocked.CompareExchange(ref CurrentServer, null, selected); + throw; + } + } + public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) { - var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; - var selected = multiplexer.SelectServer(command, flags, channel); + ServerEndPoint selected = null; // Do we have a server already? And is it connected? Then bail out. if (CurrentServer?.IsSubscriberConnected == true) @@ -216,7 +250,13 @@ public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer // Otherwise try and subscribe on the server side try { - var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); + var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; + selected = multiplexer.SelectServer(command, flags, channel); + + var source = TaskResultBox.Create(out var taskSource, asyncState); + var message = GetMessage(channel, command, internalCall); + message.SetSource(ResultProcessor.TrackSubscriptions, source); + // TODO: Could move this entirely into a processor, e.g. the CurrentServer removal we need below var success = await multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, selected); if (!success) @@ -239,7 +279,10 @@ public async Task UnsubscribeFromServerAsync(RedisChannel channel, object var oldOwner = Interlocked.Exchange(ref CurrentServer, null); if (oldOwner != null) { - var message = GetMessage(channel, command, asyncState, internalCall, out var taskSource); + var source = TaskResultBox.Create(out var taskSource, asyncState); + var message = GetMessage(channel, command, internalCall); + message.SetSource(ResultProcessor.TrackSubscriptions, source); + var success = await oldOwner.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, oldOwner); if (!success) { @@ -387,8 +430,11 @@ void ISubscriber.Subscribe(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { - var task = SubscribeAsync(channel, handler, queue, flags); - if ((flags & CommandFlags.FireAndForget) == 0) Wait(task); + if (channel.IsNullOrEmpty) + { + throw new ArgumentNullException(nameof(channel)); + } + multiplexer.AddSubscription(channel, handler, queue, flags); } public ChannelMessageQueue Subscribe(RedisChannel channel, CommandFlags flags = CommandFlags.None) From 5de45a26019911766c4e3e3282237e0ed42a667b Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 13:48:07 -0500 Subject: [PATCH 078/117] Tests: profiler logging made easier --- .../Profiling/ProfiledCommand.cs | 23 +++++++++---------- .../Helpers/TextWriterOutputHelper.cs | 14 +++++++++++ tests/StackExchange.Redis.Tests/TestBase.cs | 11 +++++++++ .../TestExtensions.cs | 15 ++++++++++++ 4 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 tests/StackExchange.Redis.Tests/TestExtensions.cs diff --git a/src/StackExchange.Redis/Profiling/ProfiledCommand.cs b/src/StackExchange.Redis/Profiling/ProfiledCommand.cs index 5f4ff899f..4c1d366f9 100644 --- a/src/StackExchange.Redis/Profiling/ProfiledCommand.cs +++ b/src/StackExchange.Redis/Profiling/ProfiledCommand.cs @@ -55,6 +55,7 @@ private static TimeSpan GetElapsedTime(long timestampDelta) private long RequestSentTimeStamp; private long ResponseReceivedTimeStamp; private long CompletedTimeStamp; + private ConnectionType? ConnectionType; private readonly ProfilingSession PushToWhenFinished; @@ -86,7 +87,11 @@ public void SetMessage(Message msg) MessageCreatedTimeStamp = msg.CreatedTimestamp; } - public void SetEnqueued() => SetTimestamp(ref EnqueuedTimeStamp); + public void SetEnqueued(ConnectionType? connType) + { + SetTimestamp(ref EnqueuedTimeStamp); + ConnectionType = connType; + } public void SetRequestSent() => SetTimestamp(ref RequestSentTimeStamp); @@ -117,16 +122,10 @@ public void SetCompleted() } public override string ToString() => -$@"EndPoint = {EndPoint} -Db = {Db} -Command = {Command} -CommandCreated = {CommandCreated:u} -CreationToEnqueued = {CreationToEnqueued} -EnqueuedToSending = {EnqueuedToSending} -SentToResponse = {SentToResponse} -ResponseToCompletion = {ResponseToCompletion} -ElapsedTime = {ElapsedTime} -Flags = {Flags} -RetransmissionOf = ({RetransmissionOf?.ToString() ?? "nothing"})"; +$@"{Command} (DB: {Db}, Flags: {Flags}) + EndPoint = {EndPoint} ({ConnectionType}) + Created = {CommandCreated:HH:mm:ss.ffff} + ElapsedTime = {ElapsedTime.TotalMilliseconds} ms (CreationToEnqueued: {CreationToEnqueued.TotalMilliseconds} ms, EnqueuedToSending: {EnqueuedToSending.TotalMilliseconds} ms, SentToResponse: {SentToResponse.TotalMilliseconds} ms, ResponseToCompletion = {ResponseToCompletion.TotalMilliseconds} ms){(RetransmissionOf != null ? @" + RetransmissionOf = " + RetransmissionOf : "")}"; } } diff --git a/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs b/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs index 7d89a187a..b0c27f6fa 100644 --- a/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs +++ b/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs @@ -20,6 +20,20 @@ public TextWriterOutputHelper(ITestOutputHelper outputHelper, bool echoToConsole public void EchoTo(StringBuilder sb) => Echo = sb; + public void WriteLineNoTime(string value) + { + try + { + base.WriteLine(value); + } + catch (Exception ex) + { + Console.Write("Attempted to write: "); + Console.WriteLine(value); + Console.WriteLine(ex); + } + } + public override void WriteLine(string value) { try diff --git a/tests/StackExchange.Redis.Tests/TestBase.cs b/tests/StackExchange.Redis.Tests/TestBase.cs index 0cca4e5b7..dd04fa9da 100644 --- a/tests/StackExchange.Redis.Tests/TestBase.cs +++ b/tests/StackExchange.Redis.Tests/TestBase.cs @@ -8,6 +8,7 @@ using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; +using StackExchange.Redis.Profiling; using StackExchange.Redis.Tests.Helpers; using Xunit; using Xunit.Abstractions; @@ -76,6 +77,16 @@ protected void Log(string message, params object[] args) } } + protected ProfiledCommandEnumerable Log(ProfilingSession session) + { + var profile = session.FinishProfiling(); + foreach (var command in profile) + { + Writer.WriteLineNoTime(command.ToString()); + } + return profile; + } + protected void CollectGarbage() { GC.Collect(GC.MaxGeneration, GCCollectionMode.Forced); diff --git a/tests/StackExchange.Redis.Tests/TestExtensions.cs b/tests/StackExchange.Redis.Tests/TestExtensions.cs new file mode 100644 index 000000000..b4c9707fd --- /dev/null +++ b/tests/StackExchange.Redis.Tests/TestExtensions.cs @@ -0,0 +1,15 @@ +using System; +using StackExchange.Redis.Profiling; + +namespace StackExchange.Redis.Tests +{ + public static class TestExtensions + { + public static ProfilingSession AddProfiler(this IConnectionMultiplexer mutex) + { + var session = new ProfilingSession(); + mutex.RegisterProfiler(() => session); + return session; + } + } +} From 64565dd13a882be445ae39a85be6b5b30c5695cd Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 14:12:43 -0500 Subject: [PATCH 079/117] Tests: use profiling and add more logging --- tests/StackExchange.Redis.Tests/PubSub.cs | 49 ++++++++++------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 2ca92a430..0ba6a928c 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -1,13 +1,11 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Text; using System.Threading; using System.Threading.Channels; using System.Threading.Tasks; using StackExchange.Redis.Maintenance; -using StackExchange.Redis.Profiling; using Xunit; using Xunit.Abstractions; // ReSharper disable AccessToModifiedClosure @@ -127,13 +125,14 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br [Fact] public async Task TestBasicPubSubFireAndForget() { - using (var muxer = Create(log: Writer)) + using (var muxer = Create(log: Writer, shared: false)) { + var profiler = muxer.AddProfiler(); var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); RedisChannel key = Me() + Guid.NewGuid(); - HashSet received = new HashSet(); + HashSet received = new(); int secondHandler = 0; await PingAsync(muxer, pub, sub).ForAwait(); sub.Subscribe(key, (channel, payload) => @@ -148,6 +147,7 @@ public async Task TestBasicPubSubFireAndForget() }, CommandFlags.FireAndForget); sub.Subscribe(key, (_, __) => Interlocked.Increment(ref secondHandler), CommandFlags.FireAndForget); + Log(profiler); lock (received) { @@ -159,6 +159,8 @@ public async Task TestBasicPubSubFireAndForget() await PingAsync(muxer, pub, sub).ForAwait(); await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); + Log(profiler); + lock (received) { Assert.Single(received); @@ -169,7 +171,7 @@ public async Task TestBasicPubSubFireAndForget() count = sub.Publish(key, "ghi", CommandFlags.FireAndForget); await PingAsync(muxer, pub, sub).ForAwait(); - + Log(profiler); lock (received) { Assert.Single(received); @@ -178,27 +180,30 @@ public async Task TestBasicPubSubFireAndForget() } } - private static async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, ISubscriber sub, int times = 1) + private async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, ISubscriber sub, int times = 1) { while (times-- > 0) { // both use async because we want to drain the completion managers, and the only // way to prove that is to use TPL objects - var t1 = sub.PingAsync(); - var t2 = pub.PingAsync(); - await Task.WhenAll(t1, t2).ForAwait(); + var subTask = sub.PingAsync(); + var pubTask = pub.PingAsync(); + await Task.WhenAll(subTask, pubTask).ForAwait(); + + Log($"Sub PING time: {subTask.Result.TotalMilliseconds} ms"); + Log($"Pub PING time: {pubTask.Result.TotalMilliseconds} ms"); } } [Fact] public async Task TestPatternPubSub() { - using (var muxer = Create()) + using (var muxer = Create(shared: false)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); - HashSet received = new HashSet(); + HashSet received = new(); int secondHandler = 0; sub.Subscribe("a*c", (channel, payload) => { @@ -238,7 +243,6 @@ public async Task TestPatternPubSub() { Assert.Single(received); } - Assert.Equal(0, count); } } @@ -746,10 +750,9 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - var session = new ProfilingSession(); using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) { - muxer.RegisterProfiler(() => session); + var profiler = muxer.AddProfiler(); RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); int counter = 0; @@ -758,11 +761,7 @@ await sub.SubscribeAsync(channel, delegate Interlocked.Increment(ref counter); }).ConfigureAwait(false); - var profile1 = session.FinishProfiling(); - foreach (var command in profile1) - { - Log($"{command.EndPoint}: {command}"); - } + var profile1 = Log(profiler); // We shouldn't see the initial connection here Assert.Equal(0, profile1.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); @@ -775,7 +774,7 @@ await sub.SubscribeAsync(channel, delegate await Task.Delay(200).ConfigureAwait(false); var counter1 = Thread.VolatileRead(ref counter); - Log($"Expecting 1 messsage, got {counter1}"); + Log($"Expecting 1 message, got {counter1}"); Assert.Equal(1, counter1); var server = GetServer(muxer); @@ -808,11 +807,7 @@ await sub.SubscribeAsync(channel, delegate await Task.Delay(1000).ConfigureAwait(false); // Ensure we've sent the subscribe command after reconnecting - var profile2 = session.FinishProfiling(); - foreach (var command in profile2) - { - Log($"{command.EndPoint}: {command}"); - } + var profile2 = Log(profiler); //Assert.Equal(1, profile2.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); Log($"Issuing ping after reconnected"); @@ -830,12 +825,12 @@ await sub.SubscribeAsync(channel, delegate await UntilCondition(TimeSpan.FromSeconds(5), () => Thread.VolatileRead(ref counter) == 2); var counter2 = Thread.VolatileRead(ref counter); - Log($"Expecting 2 messsages, got {counter2}"); + Log($"Expecting 2 messages, got {counter2}"); Assert.Equal(2, counter2); // Log all commands at the end Log("All commands since connecting:"); - var profile3 = session.FinishProfiling(); + var profile3 = profiler.FinishProfiling(); foreach (var command in profile3) { Log($"{command.EndPoint}: {command}"); From 00f851c300f1a7c3d02e28c67688015c15356940 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 14:13:51 -0500 Subject: [PATCH 080/117] Pub/Sub: Register immediately, but complete async This moves the registration bits into the result processor so it happens at the right time and we can properly FireAndForget with things still behaving. --- src/StackExchange.Redis/Message.cs | 4 +- src/StackExchange.Redis/RedisSubscriber.cs | 440 +++++++++--------- src/StackExchange.Redis/ResultProcessor.cs | 13 +- src/StackExchange.Redis/ServerEndPoint.cs | 2 +- .../Issues/Issue1101.cs | 2 +- 5 files changed, 225 insertions(+), 236 deletions(-) diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index a23c5cf20..708e4600f 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -589,7 +589,7 @@ internal bool TrySetResult(T value) internal void SetEnqueued(PhysicalConnection connection) { SetWriteTime(); - performance?.SetEnqueued(); + performance?.SetEnqueued(connection?.BridgeCouldBeNull?.ConnectionType); _enqueuedTo = connection; if (connection == null) { @@ -735,6 +735,8 @@ protected CommandChannelBase(int db, CommandFlags flags, RedisCommand command, i } public override string CommandAndKey => Command + " " + Channel; + + public override int GetHashSlot(ServerSelectionStrategy serverSelectionStrategy) => serverSelectionStrategy.HashSlot(Channel); } internal abstract class CommandKeyBase : Message diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 36000e9ed..5b75ca9b1 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -4,61 +4,50 @@ using System.Threading; using System.Threading.Tasks; using Pipelines.Sockets.Unofficial; +using static StackExchange.Redis.ConnectionMultiplexer; namespace StackExchange.Redis { public partial class ConnectionMultiplexer { - private readonly SemaphoreSlim subscriptionsAddLock = new SemaphoreSlim(1, 1); + private RedisSubscriber _defaultSubscriber; + private RedisSubscriber DefaultSubscriber => _defaultSubscriber ??= new RedisSubscriber(this, null); + private readonly ConcurrentDictionary subscriptions = new(); + internal ConcurrentDictionary GetSubscriptions() => subscriptions; internal int GetSubscriptionsCount() => subscriptions.Count; - internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) + internal Subscription GetOrAddSubscription(in RedisChannel channel, CommandFlags flags) { - if (subscriptions.TryGetValue(channel, out var sub)) + lock (subscriptions) { - sub.GetSubscriberCounts(out handlers, out queues); - return true; - } - handlers = queues = 0; - return false; - } - - internal bool AddSubscription(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) - { - if (handler != null || queue != null) - { - if (!subscriptions.TryGetValue(channel, out Subscription sub)) + if (!subscriptions.TryGetValue(channel, out var sub)) { sub = new Subscription(flags); subscriptions.TryAdd(channel, sub); - if (!sub.SubscribeToServer(this, channel, flags, false)) - { - return false; - } } - sub.Add(handler, queue); + return sub; + } + } + internal bool TryGetSubscription(in RedisChannel channel, out Subscription sub) => subscriptions.TryGetValue(channel, out sub); + internal bool TryRemoveSubscription(in RedisChannel channel, out Subscription sub) + { + lock (subscriptions) + { + return subscriptions.TryRemove(channel, out sub); } - return true; } - internal async Task AddSubscriptionAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) + internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) { - if (handler != null || queue != null) + if (subscriptions.TryGetValue(channel, out var sub)) { - if (!subscriptions.TryGetValue(channel, out Subscription sub)) - { - sub = new Subscription(flags); - subscriptions.TryAdd(channel, sub); - if (!(await sub.SubscribeToServerAsync(this, channel, flags, asyncState, false))) - { - return false; - } - } - sub.Add(handler, queue); + sub.GetSubscriberCounts(out handlers, out queues); + return true; } - return true; + handlers = queues = 0; + return false; } internal ServerEndPoint GetSubscribedServer(in RedisChannel channel) @@ -88,66 +77,20 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i } } - internal Task RemoveAllSubscriptionsAsync(CommandFlags flags, object asyncState) + internal void EnsureSubscriptions(CommandFlags flags = CommandFlags.None) { - Task last = null; foreach (var pair in subscriptions) { - if (subscriptions.TryRemove(pair.Key, out var sub)) - { - pair.Value.MarkCompleted(); - last = pair.Value.UnsubscribeFromServerAsync(pair.Key, asyncState, false); - } - } - return last ?? CompletedTask.Default(asyncState); - } - - internal Task RemoveSubscriptionAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags, object asyncState) - { - Task task = null; - if (subscriptions.TryGetValue(channel, out Subscription sub)) - { - bool removeChannel; - if (handler == null & queue == null) // blanket wipe - { - sub.MarkCompleted(); - removeChannel = true; - } - else - { - removeChannel = sub.Remove(handler, queue); - } - // If it was the last handler or a blanket wipe, remove it. - if (removeChannel) - { - subscriptions.TryRemove(channel, out _); - task = sub.UnsubscribeFromServerAsync(channel, asyncState, false); - } + DefaultSubscriber.EnsureSubscribedToServer(pair.Value, pair.Key, flags, true); } - return task ?? CompletedTask.Default(asyncState); } - internal void ResendSubscriptions(ServerEndPoint server) - { - if (server == null) return; - foreach (var pair in subscriptions) - { - pair.Value.Resubscribe(pair.Key, server); - } - } - - internal bool SubscriberConnected(in RedisChannel channel = default(RedisChannel)) - { - var server = GetSubscribedServer(channel) ?? SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, channel); - return server?.IsConnected == true && server.IsSubscriberConnected; - } - - internal async Task EnsureSubscriptionsAsync() + internal async Task EnsureSubscriptionsAsync(CommandFlags flags = CommandFlags.None) { long count = 0; foreach (var pair in subscriptions) { - if (await pair.Value.EnsureSubscribedAsync(this, pair.Key)) + if (await DefaultSubscriber.EnsureSubscribedToServerAsync(pair.Value, pair.Key, flags, true)) { count++; } @@ -155,18 +98,43 @@ internal async Task EnsureSubscriptionsAsync() return count; } + internal enum SubscriptionAction + { + Subscribe, + Unsubscribe + } + internal sealed class Subscription { private Action _handlers; private ChannelMessageQueue _queues; private ServerEndPoint CurrentServer; public CommandFlags Flags { get; } + public ResultProcessor.TrackSubscriptionsProcessor Processor { get; } - public Subscription(CommandFlags flags) => Flags = flags; + internal bool IsConnected => CurrentServer?.IsSubscriberConnected == true; + + public Subscription(CommandFlags flags) + { + Flags = flags; + Processor = new ResultProcessor.TrackSubscriptionsProcessor(this); + } - private Message GetMessage(RedisChannel channel, RedisCommand command, bool internalCall) + internal Message GetMessage(RedisChannel channel, SubscriptionAction action, CommandFlags flags, bool internalCall) { - var msg = Message.Create(-1, Flags, command, channel); + var isPattern = channel.IsPatternBased; + var command = action switch + { + SubscriptionAction.Subscribe when isPattern => RedisCommand.PSUBSCRIBE, + SubscriptionAction.Unsubscribe when isPattern => RedisCommand.PUNSUBSCRIBE, + + SubscriptionAction.Subscribe when !isPattern => RedisCommand.SUBSCRIBE, + SubscriptionAction.Unsubscribe when !isPattern => RedisCommand.UNSUBSCRIBE, + _ => throw new ArgumentOutOfRangeException("This would be an impressive boolean feat"), + }; + + // TODO: Consider flags here - we need to pass Fire and Forget, but don't want to intermingle Primary/Replica + var msg = Message.Create(-1, Flags | flags, command, channel); msg.SetForSubscriptionBridge(); if (internalCall) { @@ -175,6 +143,8 @@ private Message GetMessage(RedisChannel channel, RedisCommand command, bool inte return msg; } + internal void SetServer(ServerEndPoint server) => CurrentServer = server; + public void Add(Action handler, ChannelMessageQueue queue) { if (handler != null) @@ -213,118 +183,8 @@ public bool Remove(Action handler, ChannelMessageQueue return _handlers == null & _queues == null; } - public bool SubscribeToServer(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, bool internalCall) - { - ServerEndPoint selected = null; - // Do we have a server already? And is it connected? Then bail out. - if (CurrentServer?.IsSubscriberConnected == true) - { - return false; - } - // Otherwise try and subscribe on the server side - try - { - var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; - selected = multiplexer.SelectServer(command, flags, channel); - - var message = GetMessage(channel, command, internalCall); - return multiplexer.ExecuteSyncImpl(message, ResultProcessor.TrackSubscriptions, selected); - } - catch - { - // If there was an exception, clear the owner - Interlocked.CompareExchange(ref CurrentServer, null, selected); - throw; - } - } - - public async Task SubscribeToServerAsync(ConnectionMultiplexer multiplexer, RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) - { - ServerEndPoint selected = null; - - // Do we have a server already? And is it connected? Then bail out. - if (CurrentServer?.IsSubscriberConnected == true) - { - return false; - } - // Otherwise try and subscribe on the server side - try - { - var command = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; - selected = multiplexer.SelectServer(command, flags, channel); - - var source = TaskResultBox.Create(out var taskSource, asyncState); - var message = GetMessage(channel, command, internalCall); - message.SetSource(ResultProcessor.TrackSubscriptions, source); - - // TODO: Could move this entirely into a processor, e.g. the CurrentServer removal we need below - var success = await multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, selected); - if (!success) - { - taskSource.SetResult(false); - } - return await taskSource.Task; - } - catch - { - // If there was an exception, clear the owner - Interlocked.CompareExchange(ref CurrentServer, null, selected); - throw; - } - } - - public async Task UnsubscribeFromServerAsync(RedisChannel channel, object asyncState, bool internalCall) - { - var command = channel.IsPatternBased ? RedisCommand.PUNSUBSCRIBE : RedisCommand.UNSUBSCRIBE; - var oldOwner = Interlocked.Exchange(ref CurrentServer, null); - if (oldOwner != null) - { - var source = TaskResultBox.Create(out var taskSource, asyncState); - var message = GetMessage(channel, command, internalCall); - message.SetSource(ResultProcessor.TrackSubscriptions, source); - - var success = await oldOwner.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.TrackSubscriptions, asyncState, oldOwner); - if (!success) - { - taskSource.SetCanceled(); - } - return await taskSource.Task; - } - return false; - } - internal ServerEndPoint GetCurrentServer() => Volatile.Read(ref CurrentServer); - internal void Resubscribe(in RedisChannel channel, ServerEndPoint server) - { - // Only re-subscribe to the original server - if (server != null && GetCurrentServer() == server) - { - var cmd = channel.IsPatternBased ? RedisCommand.PSUBSCRIBE : RedisCommand.SUBSCRIBE; - var msg = Message.Create(-1, CommandFlags.FireAndForget, cmd, channel); - msg.SetInternalCall(); - server.Multiplexer.ExecuteSyncImpl(msg, ResultProcessor.TrackSubscriptions, server); - } - } - - internal async ValueTask EnsureSubscribedAsync(ConnectionMultiplexer multiplexer, RedisChannel channel) - { - bool changed = false; - var oldOwner = Volatile.Read(ref CurrentServer); - // If the old server is bad, unsubscribe - if (oldOwner != null && !oldOwner.IsSelectable(RedisCommand.PSUBSCRIBE)) - { - changed = await UnsubscribeFromServerAsync(channel, null, true); - oldOwner = null; - } - // If we didn't have an owner or just cleared one, subscribe - if (oldOwner == null) - { - changed = await SubscribeToServerAsync(multiplexer, channel, CommandFlags.FireAndForget, null, true); - } - return changed; - } - internal void GetSubscriberCounts(out int handlers, out int queues) { queues = ChannelMessageQueue.Count(ref _queues); @@ -366,7 +226,11 @@ public Task IdentifyEndpointAsync(RedisChannel channel, CommandFlags f return ExecuteAsync(msg, ResultProcessor.ConnectionIdentity); } - public bool IsConnected(RedisChannel channel = default(RedisChannel)) => multiplexer.SubscriberConnected(channel); + public bool IsConnected(RedisChannel channel = default(RedisChannel)) + { + var server = multiplexer.GetSubscribedServer(channel) ?? multiplexer.SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, channel); + return server?.IsConnected == true && server.IsSubscriberConnected; + } public override TimeSpan Ping(CommandFlags flags = CommandFlags.None) { @@ -405,22 +269,24 @@ private Message CreatePingMessage(CommandFlags flags) return msg; } - public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) + private void ThrowIfNull(in RedisChannel channel) { if (channel.IsNullOrEmpty) { throw new ArgumentNullException(nameof(channel)); } + } + + public long Publish(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) + { + ThrowIfNull(channel); var msg = Message.Create(-1, flags, RedisCommand.PUBLISH, channel, message); return ExecuteSync(msg, ResultProcessor.Int64); } public Task PublishAsync(RedisChannel channel, RedisValue message, CommandFlags flags = CommandFlags.None) { - if (channel.IsNullOrEmpty) - { - throw new ArgumentNullException(nameof(channel)); - } + ThrowIfNull(channel); var msg = Message.Create(-1, flags, RedisCommand.PUBLISH, channel, message); return ExecuteAsync(msg, ResultProcessor.Int64); } @@ -428,15 +294,6 @@ public Task PublishAsync(RedisChannel channel, RedisValue message, Command void ISubscriber.Subscribe(RedisChannel channel, Action handler, CommandFlags flags) => Subscribe(channel, handler, null, flags); - public void Subscribe(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) - { - if (channel.IsNullOrEmpty) - { - throw new ArgumentNullException(nameof(channel)); - } - multiplexer.AddSubscription(channel, handler, queue, flags); - } - public ChannelMessageQueue Subscribe(RedisChannel channel, CommandFlags flags = CommandFlags.None) { var queue = new ChannelMessageQueue(channel, this); @@ -444,20 +301,37 @@ public ChannelMessageQueue Subscribe(RedisChannel channel, CommandFlags flags = return queue; } - Task ISubscriber.SubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) - => SubscribeAsync(channel, handler, null, flags); + public bool Subscribe(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + { + ThrowIfNull(channel); + if (handler == null && queue == null) { return true; } - public Task SubscribeAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + var sub = multiplexer.GetOrAddSubscription(channel, flags); + sub.Add(handler, queue); + return EnsureSubscribedToServer(sub, channel, flags, false); + } + + internal bool EnsureSubscribedToServer(Subscription sub, RedisChannel channel, CommandFlags flags, bool internalCall) { - if (channel.IsNullOrEmpty) + if (sub.IsConnected) { return true; } + + // TODO: Cleanup old hangers here? + + try { - throw new ArgumentNullException(nameof(channel)); + var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); + var selected = multiplexer.SelectServer(message); + return multiplexer.ExecuteSyncImpl(message, sub.Processor, selected); + } + catch + { + sub.SetServer(null); // If there was an exception, clear the owner + throw; } - return multiplexer.AddSubscriptionAsync(channel, handler, queue, flags, asyncState); } - internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) - => multiplexer.GetSubscriberCounts(channel, out handlers, out queues); + Task ISubscriber.SubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) + => SubscribeAsync(channel, handler, null, flags); public async Task SubscribeAsync(RedisChannel channel, CommandFlags flags = CommandFlags.None) { @@ -466,35 +340,137 @@ public async Task SubscribeAsync(RedisChannel channel, Comm return queue; } + public Task SubscribeAsync(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + { + ThrowIfNull(channel); + if (handler == null && queue == null) { return CompletedTask.Default(null); } + + var sub = multiplexer.GetOrAddSubscription(channel, flags); + sub.Add(handler, queue); + return EnsureSubscribedToServerAsync(sub, channel, flags, false); + } + + public async Task EnsureSubscribedToServerAsync(Subscription sub, RedisChannel channel, CommandFlags flags, bool internalCall) + { + if (sub.IsConnected) { return false; } + + // TODO: Cleanup old hangers here? + + try + { + var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); + var selected = multiplexer.SelectServer(message); + return await ExecuteAsync(message, sub.Processor, selected); + } + catch + { + // If there was an exception, clear the owner + sub.SetServer(null); + throw; + } + } + public EndPoint SubscribedEndpoint(RedisChannel channel) => multiplexer.GetSubscribedServer(channel)?.EndPoint; void ISubscriber.Unsubscribe(RedisChannel channel, Action handler, CommandFlags flags) => Unsubscribe(channel, handler, null, flags); - public void Unsubscribe(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + public bool Unsubscribe(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { - var task = UnsubscribeAsync(channel, handler, queue, flags); - if ((flags & CommandFlags.FireAndForget) == 0) Wait(task); + ThrowIfNull(channel); + return UnregisterSubscription(channel, handler, queue, out var sub) + ? UnsubscribeFromServer(sub, channel, flags, false) + : true; } - public void UnsubscribeAll(CommandFlags flags = CommandFlags.None) + private bool UnsubscribeFromServer(Subscription sub, RedisChannel channel, CommandFlags flags, bool internalCall) { - var task = UnsubscribeAllAsync(flags); - if ((flags & CommandFlags.FireAndForget) == 0) Wait(task); + if (sub.GetCurrentServer() is ServerEndPoint oldOwner) + { + var message = sub.GetMessage(channel, SubscriptionAction.Unsubscribe, flags, internalCall); + return multiplexer.ExecuteSyncImpl(message, sub.Processor, oldOwner); + } + return false; } - public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) => multiplexer.RemoveAllSubscriptionsAsync(flags, asyncState); - Task ISubscriber.UnsubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) => UnsubscribeAsync(channel, handler, null, flags); - public Task UnsubscribeAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) + public Task UnsubscribeAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { - if (channel.IsNullOrEmpty) + ThrowIfNull(channel); + return UnregisterSubscription(channel, handler, queue, out var sub) + ? UnsubscribeFromServerAsync(sub, channel, flags, asyncState, false) + : CompletedTask.Default(asyncState); + } + + private Task UnsubscribeFromServerAsync(Subscription sub, RedisChannel channel, CommandFlags flags, object asyncState, bool internalCall) + { + if (sub.GetCurrentServer() is ServerEndPoint oldOwner) { - throw new ArgumentNullException(nameof(channel)); + var message = sub.GetMessage(channel, SubscriptionAction.Unsubscribe, flags, internalCall); + return multiplexer.ExecuteAsyncImpl(message, sub.Processor, asyncState, oldOwner); + } + return CompletedTask.FromResult(true, asyncState); + } + + /// + /// Unregisters a handler or queue and returns if we should remove it from the server. + /// + /// True if we should remove the subscription from the server, false otherwise. + private bool UnregisterSubscription(in RedisChannel channel, Action handler, ChannelMessageQueue queue, out Subscription sub) + { + ThrowIfNull(channel); + if (multiplexer.TryGetSubscription(channel, out sub)) + { + bool shouldRemoveSubscriptionFromServer = false; + if (handler == null & queue == null) // blanket wipe + { + sub.MarkCompleted(); + shouldRemoveSubscriptionFromServer = true; + } + else + { + shouldRemoveSubscriptionFromServer = sub.Remove(handler, queue); + } + // If it was the last handler or a blanket wipe, remove it. + if (shouldRemoveSubscriptionFromServer) + { + multiplexer.TryRemoveSubscription(channel, out _); + return true; + } + } + return false; + } + + public void UnsubscribeAll(CommandFlags flags = CommandFlags.None) + { + // TODO: Unsubscribe multi key command to reduce round trips + var subs = multiplexer.GetSubscriptions(); + foreach (var pair in subs) + { + if (subs.TryRemove(pair.Key, out var sub)) + { + sub.MarkCompleted(); + UnsubscribeFromServer(sub, pair.Key, flags, false); + } + } + } + + public Task UnsubscribeAllAsync(CommandFlags flags = CommandFlags.None) + { + // TODO: Unsubscribe multi key command to reduce round trips + Task last = null; + var subs = multiplexer.GetSubscriptions(); + foreach (var pair in subs) + { + if (subs.TryRemove(pair.Key, out var sub)) + { + sub.MarkCompleted(); + last = UnsubscribeFromServerAsync(sub, pair.Key, flags, asyncState, false); + } } - return multiplexer.RemoveSubscriptionAsync(channel, handler, queue, flags, asyncState); + return last ?? CompletedTask.Default(asyncState); } } } diff --git a/src/StackExchange.Redis/ResultProcessor.cs b/src/StackExchange.Redis/ResultProcessor.cs index 0fc523316..ea56a02a4 100644 --- a/src/StackExchange.Redis/ResultProcessor.cs +++ b/src/StackExchange.Redis/ResultProcessor.cs @@ -18,7 +18,7 @@ public static readonly ResultProcessor DemandPONG = new ExpectBasicStringProcessor(CommonReplies.PONG), DemandZeroOrOne = new DemandZeroOrOneProcessor(), AutoConfigure = new AutoConfigureProcessor(), - TrackSubscriptions = new TrackSubscriptionsProcessor(), + TrackSubscriptions = new TrackSubscriptionsProcessor(null), Tracer = new TracerProcessor(false), EstablishConnection = new TracerProcessor(true), BackgroundSaveStarted = new ExpectBasicStringProcessor(CommonReplies.backgroundSavingStarted_trimmed, startsWith: true); @@ -392,6 +392,9 @@ protected override void WriteImpl(PhysicalConnection physical) public sealed class TrackSubscriptionsProcessor : ResultProcessor { + private ConnectionMultiplexer.Subscription Subscription { get; } + public TrackSubscriptionsProcessor(ConnectionMultiplexer.Subscription sub) => Subscription = sub; + protected override bool SetResultCore(PhysicalConnection connection, Message message, in RawResult result) { if (result.Type == ResultType.MultiBulk) @@ -400,6 +403,14 @@ protected override bool SetResultCore(PhysicalConnection connection, Message mes if (items.Length >= 3 && items[2].TryGetInt64(out long count)) { connection.SubscriptionCount = count; + SetResult(message, true); + + var newServer = message.Command switch + { + RedisCommand.SUBSCRIBE or RedisCommand.PSUBSCRIBE => connection.BridgeCouldBeNull?.ServerEndPoint, + _ => null + }; + Subscription?.SetServer(newServer); return true; } } diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index ab2889e65..d19f2d4eb 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -615,7 +615,7 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) { if (bridge == subscription) { - Multiplexer.ResendSubscriptions(this); + Multiplexer.EnsureSubscriptions(); } else if (bridge == interactive) { diff --git a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs index aebf76648..5812dfccc 100644 --- a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs +++ b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs @@ -15,7 +15,7 @@ public Issue1101(ITestOutputHelper output) : base(output) { } private static void AssertCounts(ISubscriber pubsub, in RedisChannel channel, bool has, int handlers, int queues) { - var aHas = ((RedisSubscriber)pubsub).GetSubscriberCounts(channel, out var ah, out var aq); + var aHas = (pubsub.Multiplexer as ConnectionMultiplexer).GetSubscriberCounts(channel, out var ah, out var aq); Assert.Equal(has, aHas); Assert.Equal(handlers, ah); Assert.Equal(queues, aq); From 029c2a35591f1ce13f3673246530b09de8807343 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 14:19:52 -0500 Subject: [PATCH 081/117] Pre-clear rather than await --- src/StackExchange.Redis/RedisSubscriber.cs | 35 +++++++--------------- tests/StackExchange.Redis.Tests/PubSub.cs | 2 -- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 5b75ca9b1..6885b28f9 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -317,17 +317,10 @@ internal bool EnsureSubscribedToServer(Subscription sub, RedisChannel channel, C // TODO: Cleanup old hangers here? - try - { - var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); - var selected = multiplexer.SelectServer(message); - return multiplexer.ExecuteSyncImpl(message, sub.Processor, selected); - } - catch - { - sub.SetServer(null); // If there was an exception, clear the owner - throw; - } + sub.SetServer(null); // we're not appropriately connected, so blank it out for eligible reconnection + var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); + var selected = multiplexer.SelectServer(message); + return multiplexer.ExecuteSyncImpl(message, sub.Processor, selected); } Task ISubscriber.SubscribeAsync(RedisChannel channel, Action handler, CommandFlags flags) @@ -350,24 +343,16 @@ public Task SubscribeAsync(RedisChannel channel, Action EnsureSubscribedToServerAsync(Subscription sub, RedisChannel channel, CommandFlags flags, bool internalCall) + public Task EnsureSubscribedToServerAsync(Subscription sub, RedisChannel channel, CommandFlags flags, bool internalCall) { - if (sub.IsConnected) { return false; } + if (sub.IsConnected) { return CompletedTask.Default(null); } // TODO: Cleanup old hangers here? - try - { - var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); - var selected = multiplexer.SelectServer(message); - return await ExecuteAsync(message, sub.Processor, selected); - } - catch - { - // If there was an exception, clear the owner - sub.SetServer(null); - throw; - } + sub.SetServer(null); // we're not appropriately connected, so blank it out for eligible reconnection + var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); + var selected = multiplexer.SelectServer(message); + return ExecuteAsync(message, sub.Processor, selected); } public EndPoint SubscribedEndpoint(RedisChannel channel) => multiplexer.GetSubscribedServer(channel)?.EndPoint; diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 0ba6a928c..10d4d8760 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -762,8 +762,6 @@ await sub.SubscribeAsync(channel, delegate }).ConfigureAwait(false); var profile1 = Log(profiler); - // We shouldn't see the initial connection here - Assert.Equal(0, profile1.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); Assert.Equal(1, muxer.GetSubscriptionsCount()); From 341f53299b88e6b9e43bf28e67ab27890eb25687 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 15:04:07 -0500 Subject: [PATCH 082/117] Fix more things --- src/StackExchange.Redis/ResultProcessor.cs | 1 + src/StackExchange.Redis/ServerEndPoint.cs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ResultProcessor.cs b/src/StackExchange.Redis/ResultProcessor.cs index ea56a02a4..79fb5a7a6 100644 --- a/src/StackExchange.Redis/ResultProcessor.cs +++ b/src/StackExchange.Redis/ResultProcessor.cs @@ -414,6 +414,7 @@ protected override bool SetResultCore(PhysicalConnection connection, Message mes return true; } } + SetResult(message, false); return false; } } diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index d19f2d4eb..902368800 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -615,7 +615,10 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) { if (bridge == subscription) { - Multiplexer.EnsureSubscriptions(); + // Note: this MUST be fire and forget, because we might be in the middle of a Sync processing + // TracerProcessor which is executing this line inside a SetResultCore(). + // Since we're issuing commands inside a SetResult path in a message, we'd create a deadlock by waiting. + Multiplexer.EnsureSubscriptions(CommandFlags.FireAndForget); } else if (bridge == interactive) { From 5dbf57558dc7a0779132f42337a3fec4912e0314 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 15:09:43 -0500 Subject: [PATCH 083/117] Add logging to TestPublishWithSubscribers --- tests/StackExchange.Redis.Tests/PubSub.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 10d4d8760..e2ea9b157 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -568,8 +568,8 @@ await Assert.ThrowsAsync(async delegate public async Task TestPublishWithSubscribers() { var channel = Me(); - using (var muxerA = Create(shared: false)) - using (var muxerB = Create(shared: false)) + using (var muxerA = Create(shared: false, log: Writer)) + using (var muxerB = Create(shared: false, log: Writer)) using (var conn = Create()) { var listenA = muxerA.GetSubscriber(); From ab41493090818c89c27c4fd91a979ee3e29973b7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 15:39:27 -0500 Subject: [PATCH 084/117] Meeeeeeerge fun --- src/StackExchange.Redis/Message.cs | 2 +- .../ServerSelectionStrategy.cs | 4 ++-- .../StackExchange.Redis.Tests/BacklogTests.cs | 24 +++++++++---------- tests/StackExchange.Redis.Tests/PubSub.cs | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index ba47b8d53..576d9cb31 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -58,7 +58,7 @@ internal abstract class Message : ICompletable private const CommandFlags AskingFlag = (CommandFlags)32, ScriptUnavailableFlag = (CommandFlags)256, - DemandSubscriptionConnection = (CommandFlags)2048; + DemandSubscriptionConnection = (CommandFlags)1024; private const CommandFlags MaskMasterServerPreference = CommandFlags.DemandMaster | CommandFlags.DemandReplica diff --git a/src/StackExchange.Redis/ServerSelectionStrategy.cs b/src/StackExchange.Redis/ServerSelectionStrategy.cs index f6a61d2a7..ba7b40547 100644 --- a/src/StackExchange.Redis/ServerSelectionStrategy.cs +++ b/src/StackExchange.Redis/ServerSelectionStrategy.cs @@ -123,10 +123,10 @@ public ServerEndPoint Select(RedisCommand command, in RedisKey key, CommandFlags return Select(slot, command, flags, allowDisconnected); } - public ServerEndPoint Select(RedisCommand command, in RedisChannel channel, CommandFlags flags) + public ServerEndPoint Select(RedisCommand command, in RedisChannel channel, CommandFlags flags, bool allowDisconnected = false) { int slot = ServerType == ServerType.Cluster ? HashSlot(channel) : NoSlot; - return Select(slot, command, flags); + return Select(slot, command, flags, allowDisconnected); } public bool TryResend(int hashSlot, Message message, EndPoint endpoint, bool isMoved) diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index 9ff6a2cf3..ec70b0f81 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -60,7 +60,7 @@ void PrintSnapshot(ConnectionMultiplexer muxer) await db.PingAsync(); var server = muxer.GetServerSnapshot()[0]; - var stats = server.GetBridgeStatus(RedisCommand.PING); + var stats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal // Fail the connection @@ -73,7 +73,7 @@ void PrintSnapshot(ConnectionMultiplexer muxer) Writer.WriteLine("Test: Disconnected pings"); await Assert.ThrowsAsync(() => db.PingAsync()); - var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var disconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.False(muxer.IsConnected); Assert.Equal(0, disconnectedStats.BacklogMessagesPending); @@ -85,7 +85,7 @@ void PrintSnapshot(ConnectionMultiplexer muxer) Writer.WriteLine("Test: Reconnecting"); Assert.True(muxer.IsConnected); Assert.True(server.IsConnected); - var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var reconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); _ = db.PingAsync(); @@ -137,7 +137,7 @@ public async Task QueuesAndFlushesAfterReconnectingAsync() await db.PingAsync(); var server = muxer.GetServerSnapshot()[0]; - var stats = server.GetBridgeStatus(RedisCommand.PING); + var stats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal // Fail the connection @@ -154,7 +154,7 @@ public async Task QueuesAndFlushesAfterReconnectingAsync() // TODO: Add specific server call - var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var disconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.False(muxer.IsConnected); Assert.True(disconnectedStats.BacklogMessagesPending >= 3, $"Expected {nameof(disconnectedStats.BacklogMessagesPending)} > 3, got {disconnectedStats.BacklogMessagesPending}"); @@ -169,7 +169,7 @@ public async Task QueuesAndFlushesAfterReconnectingAsync() Writer.WriteLine("Test: ignoredA Status: " + ignoredA.Status); Writer.WriteLine("Test: ignoredB Status: " + ignoredB.Status); Writer.WriteLine("Test: lastPing Status: " + lastPing.Status); - var afterConnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var afterConnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Writer.WriteLine($"Test: BacklogStatus: {afterConnectedStats.BacklogStatus}, BacklogMessagesPending: {afterConnectedStats.BacklogMessagesPending}, IsWriterActive: {afterConnectedStats.IsWriterActive}, MessagesSinceLastHeartbeat: {afterConnectedStats.MessagesSinceLastHeartbeat}, TotalBacklogMessagesQueued: {afterConnectedStats.TotalBacklogMessagesQueued}"); Writer.WriteLine("Test: Awaiting lastPing 1"); @@ -177,7 +177,7 @@ public async Task QueuesAndFlushesAfterReconnectingAsync() Writer.WriteLine("Test: Checking reconnected 2"); Assert.True(muxer.IsConnected); - var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var reconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); Writer.WriteLine("Test: Pinging again..."); @@ -230,7 +230,7 @@ public async Task QueuesAndFlushesAfterReconnecting() await db.PingAsync(); var server = muxer.GetServerSnapshot()[0]; - var stats = server.GetBridgeStatus(RedisCommand.PING); + var stats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal // Fail the connection @@ -257,9 +257,9 @@ void disconnectedPings(int id) Assert.False(muxer.IsConnected); // Give the tasks time to queue - await UntilCondition(TimeSpan.FromSeconds(5), () => server.GetBridgeStatus(RedisCommand.PING).BacklogMessagesPending >= 3); + await UntilCondition(TimeSpan.FromSeconds(5), () => server.GetBridgeStatus(ConnectionType.Interactive).BacklogMessagesPending >= 3); - var disconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var disconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Log($"Test Stats: (BacklogMessagesPending: {disconnectedStats.BacklogMessagesPending}, TotalBacklogMessagesQueued: {disconnectedStats.TotalBacklogMessagesQueued})"); Assert.True(disconnectedStats.BacklogMessagesPending >= 3, $"Expected {nameof(disconnectedStats.BacklogMessagesPending)} > 3, got {disconnectedStats.BacklogMessagesPending}"); @@ -271,7 +271,7 @@ void disconnectedPings(int id) Writer.WriteLine("Test: Checking reconnected 1"); Assert.True(muxer.IsConnected); - var afterConnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var afterConnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Writer.WriteLine($"Test: BacklogStatus: {afterConnectedStats.BacklogStatus}, BacklogMessagesPending: {afterConnectedStats.BacklogMessagesPending}, IsWriterActive: {afterConnectedStats.IsWriterActive}, MessagesSinceLastHeartbeat: {afterConnectedStats.MessagesSinceLastHeartbeat}, TotalBacklogMessagesQueued: {afterConnectedStats.TotalBacklogMessagesQueued}"); Writer.WriteLine("Test: Awaiting 3 pings"); @@ -279,7 +279,7 @@ void disconnectedPings(int id) Writer.WriteLine("Test: Checking reconnected 2"); Assert.True(muxer.IsConnected); - var reconnectedStats = server.GetBridgeStatus(RedisCommand.PING); + var reconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); Assert.Equal(0, reconnectedStats.BacklogMessagesPending); Writer.WriteLine("Test: Pinging again..."); diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 945f1a947..8aabda958 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -793,8 +793,8 @@ await sub.SubscribeAsync(channel, delegate Log("Failing connection"); // Fail all connections server.SimulateConnectionFailure(SimulatedFailureType.All); - // Trigger failure - Assert.Throws(() => sub.Ping()); + // Trigger failure (RedisTimeoutException because of backlog behavior) + Assert.Throws(() => sub.Ping()); Assert.False(sub.IsConnected(channel)); // Now reconnect... From 51c927b87ce93a1118e81f0cd91adac20eb902e7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 19:06:54 -0500 Subject: [PATCH 085/117] Light up more pubsub tests --- tests/StackExchange.Redis.Tests/PubSub.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index e2ea9b157..0f7ff0b78 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -256,7 +256,7 @@ public void TestPublishWithNoSubscribers() } } - [FactLongRunning] + [Fact] public void TestMassivePublishWithWithoutFlush_Local() { using (var muxer = Create()) @@ -304,7 +304,7 @@ private void TestMassivePublish(ISubscriber conn, string channel, string caption Assert.True(withFAF.ElapsedMilliseconds < withAsync.ElapsedMilliseconds + 3000, caption); } - [FactLongRunning] + [Fact] public async Task PubSubGetAllAnyOrder() { using (var muxer = Create(syncTimeout: 20000)) From 20d2d281c493ffc5f3824570fc269081f5979f48 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 19:46:27 -0500 Subject: [PATCH 086/117] Several subtle but big changes 1. Wait for *both* interactive and subscription connections to complete when connecting 2. Instantly mark subscriptions as having no server on every disconnect 3. Adds some more test/diagnostic logging for the tests around this --- src/StackExchange.Redis/RedisSubscriber.cs | 21 ++++++++++++++++--- src/StackExchange.Redis/ServerEndPoint.cs | 7 ++++++- .../ServerSelectionStrategy.cs | 2 +- tests/StackExchange.Redis.Tests/PubSub.cs | 17 ++++++++++----- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 6885b28f9..152a0a492 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -85,6 +85,14 @@ internal void EnsureSubscriptions(CommandFlags flags = CommandFlags.None) } } + internal void UpdateSubscriptions() + { + foreach (var pair in subscriptions) + { + pair.Value.UpdateServer(); + } + } + internal async Task EnsureSubscriptionsAsync(CommandFlags flags = CommandFlags.None) { long count = 0; @@ -182,9 +190,6 @@ public bool Remove(Action handler, ChannelMessageQueue } return _handlers == null & _queues == null; } - - internal ServerEndPoint GetCurrentServer() => Volatile.Read(ref CurrentServer); - internal void GetSubscriberCounts(out int handlers, out int queues) { queues = ChannelMessageQueue.Count(ref _queues); @@ -203,6 +208,16 @@ internal void GetSubscriberCounts(out int handlers, out int queues) foreach (var sub in tmp.AsEnumerable()) { handlers++; } } } + + internal ServerEndPoint GetCurrentServer() => Volatile.Read(ref CurrentServer); + + internal void UpdateServer() + { + if (!IsConnected) + { + SetServer(null); + } + } } } diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 902368800..18f68f36b 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -575,6 +575,10 @@ internal void OnDisconnected(PhysicalBridge bridge) { CompletePendingConnectionMonitors("Disconnected"); } + else if (bridge == subscription) + { + Multiplexer.UpdateSubscriptions(); + } } internal Task OnEstablishingAsync(PhysicalConnection connection, LogProxy log) @@ -620,8 +624,9 @@ internal void OnFullyEstablished(PhysicalConnection connection, string source) // Since we're issuing commands inside a SetResult path in a message, we'd create a deadlock by waiting. Multiplexer.EnsureSubscriptions(CommandFlags.FireAndForget); } - else if (bridge == interactive) + if (IsConnected && IsSubscriberConnected) { + // Only connect on the second leg - we can accomplish this by checking both CompletePendingConnectionMonitors(source); } diff --git a/src/StackExchange.Redis/ServerSelectionStrategy.cs b/src/StackExchange.Redis/ServerSelectionStrategy.cs index cb4bb954c..7578936f2 100644 --- a/src/StackExchange.Redis/ServerSelectionStrategy.cs +++ b/src/StackExchange.Redis/ServerSelectionStrategy.cs @@ -288,7 +288,7 @@ private ServerEndPoint[] MapForMutation() private ServerEndPoint Select(int slot, RedisCommand command, CommandFlags flags) { - flags = Message.GetMasterReplicaFlags(flags); // only intersted in master/replica preferences + flags = Message.GetMasterReplicaFlags(flags); // only interested in master/replica preferences ServerEndPoint[] arr; if (slot == NoSlot || (arr = map) == null) return Any(command, flags); diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 0f7ff0b78..817c130a1 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -801,16 +801,23 @@ await sub.SubscribeAsync(channel, delegate // Ensure we're reconnected Assert.True(sub.IsConnected(channel)); - // And time to resubscribe... - await Task.Delay(1000).ConfigureAwait(false); - // Ensure we've sent the subscribe command after reconnecting var profile2 = Log(profiler); //Assert.Equal(1, profile2.Count(p => p.Command == nameof(RedisCommand.SUBSCRIBE))); - Log($"Issuing ping after reconnected"); + Log("Issuing ping after reconnected"); sub.Ping(); - Assert.Equal(1, muxer.GetSubscriptionsCount()); + + var muxerSubCount = muxer.GetSubscriptionsCount(); + Log($"Muxer thinks we have {muxerSubCount} subscriber(s)."); + Assert.Equal(1, muxerSubCount); + + var muxerSubs = muxer.GetSubscriptions(); + foreach (var pair in muxerSubs) + { + var muxerSub = pair.Value; + Log($" Muxer Sub: {pair.Key}: (EndPoint: {muxerSub.GetCurrentServer()}, Connected: {muxerSub.IsConnected})"); + } Log("Publishing"); var published = await sub.PublishAsync(channel, "abc").ConfigureAwait(false); From 814b4016e453e329574b243a3ed01c0690d1993d Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 20:06:32 -0500 Subject: [PATCH 087/117] More PubSub logging Give the sub *receipt* just a moment to execute under load as well - we can be properly subscribe with the handler not instantly executing on the receiving side. --- tests/StackExchange.Redis.Tests/PubSub.cs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 817c130a1..d6a436fef 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -60,7 +60,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); await PingAsync(muxer, pub, sub).ForAwait(); - HashSet received = new HashSet(); + HashSet received = new(); int secondHandler = 0; string subChannel = (wildCard ? "a*c" : "abc") + breaker; string pubChannel = "abc" + breaker; @@ -106,7 +106,12 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br { Assert.Single(received); } - Assert.Equal(2, Thread.VolatileRead(ref secondHandler)); + + await UntilCondition(TimeSpan.FromSeconds(2), () => Thread.VolatileRead(ref secondHandler) == 2); + + var secondHandlerCount = Thread.VolatileRead(ref secondHandler); + Log("Expecting 2 from second handler, got: " + secondHandlerCount); + Assert.Equal(2, secondHandlerCount); Assert.Equal(1, count); // unsubscribe from second; should see nothing this time @@ -117,7 +122,9 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br { Assert.Single(received); } - Assert.Equal(2, Thread.VolatileRead(ref secondHandler)); + secondHandlerCount = Thread.VolatileRead(ref secondHandler); + Log("Expecting 2 from second handler, got: " + secondHandlerCount); + Assert.Equal(2, secondHandlerCount); Assert.Equal(0, count); } } From 667aa34c5321074ea40968736dc01c1d95755aeb Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 20:17:44 -0500 Subject: [PATCH 088/117] TestPatternPubSub: give reception a moment Again, async handler may take a second to get there under load. --- tests/StackExchange.Redis.Tests/PubSub.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index d6a436fef..1831ec2e6 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -239,6 +239,9 @@ public async Task TestPatternPubSub() { Assert.Single(received); } + + // Give reception a bit, the handler could be delayed under load + await UntilCondition(TimeSpan.FromSeconds(2), () => Thread.VolatileRead(ref secondHandler) == 1); Assert.Equal(1, Thread.VolatileRead(ref secondHandler)); sub.Unsubscribe("a*c"); From cf96bbaa2c7d5e6a58fca304cb5fafc6c6bcb3eb Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 20:40:50 -0500 Subject: [PATCH 089/117] Moar! --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 1831ec2e6..157585c09 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -96,6 +96,8 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br { Assert.Single(received); } + // Give handler firing a moment + await UntilCondition(TimeSpan.FromSeconds(2), () => Thread.VolatileRead(ref secondHandler) == 1); Assert.Equal(1, Thread.VolatileRead(ref secondHandler)); // unsubscribe from first; should still see second From a6be64a5714447cbe3c71e61e87be0d7ae99aba5 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 20:51:40 -0500 Subject: [PATCH 090/117] Add logging to Issue1101 pipe --- tests/StackExchange.Redis.Tests/Issues/Issue1101.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs index 5812dfccc..bb471b744 100644 --- a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs +++ b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs @@ -23,7 +23,7 @@ private static void AssertCounts(ISubscriber pubsub, in RedisChannel channel, [Fact] public async Task ExecuteWithUnsubscribeViaChannel() { - using (var muxer = Create()) + using (var muxer = Create(log: Writer)) { RedisChannel name = Me(); var pubsub = muxer.GetSubscriber(); @@ -89,7 +89,7 @@ public async Task ExecuteWithUnsubscribeViaChannel() [Fact] public async Task ExecuteWithUnsubscribeViaSubscriber() { - using (var muxer = Create()) + using (var muxer = Create(log: Writer)) { RedisChannel name = Me(); var pubsub = muxer.GetSubscriber(); @@ -141,7 +141,7 @@ public async Task ExecuteWithUnsubscribeViaSubscriber() [Fact] public async Task ExecuteWithUnsubscribeViaClearAll() { - using (var muxer = Create()) + using (var muxer = Create(log: Writer)) { RedisChannel name = Me(); var pubsub = muxer.GetSubscriber(); From 31b38dcd8cb0efbb05f182597958ac2a51302f7a Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 21:03:41 -0500 Subject: [PATCH 091/117] PubSubGetAllAnyOrder: don't share conn --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 157585c09..43e0fcecb 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -319,7 +319,7 @@ private void TestMassivePublish(ISubscriber conn, string channel, string caption [Fact] public async Task PubSubGetAllAnyOrder() { - using (var muxer = Create(syncTimeout: 20000)) + using (var muxer = Create(syncTimeout: 20000, shared: false)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); From 79e50901ebe0eef4019063deba0c662e480659b7 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 21:05:54 -0500 Subject: [PATCH 092/117] TextWriterOutputHelper: fix end-of-tests race case This is purely handler logging after we care - abort out. --- .../Helpers/TextWriterOutputHelper.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs b/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs index b0c27f6fa..bdcb7b55f 100644 --- a/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs +++ b/tests/StackExchange.Redis.Tests/Helpers/TextWriterOutputHelper.cs @@ -82,7 +82,14 @@ protected override void Dispose(bool disposing) private void FlushBuffer() { var text = Buffer.ToString(); - Output.WriteLine(text); + try + { + Output.WriteLine(text); + } + catch (InvalidOperationException) + { + // Thrown when writing from a handler after a test has ended - just bail in this case + } Echo?.AppendLine(text); if (ToConsole) { From 8ea3b6d1256bb2ebc322aafb7add5b149e4ce1c6 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 21:47:44 -0500 Subject: [PATCH 093/117] Bump Docker images to 6.2.6 GitHub is skipping > 5.x, lights up all tests properly. --- tests/RedisConfigs/Dockerfile | 2 +- tests/RedisConfigs/docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/RedisConfigs/Dockerfile b/tests/RedisConfigs/Dockerfile index 969497b7d..047da2975 100644 --- a/tests/RedisConfigs/Dockerfile +++ b/tests/RedisConfigs/Dockerfile @@ -1,4 +1,4 @@ -FROM redis:5 +FROM redis:6.2.6 COPY Basic /data/Basic/ COPY Failover /data/Failover/ diff --git a/tests/RedisConfigs/docker-compose.yml b/tests/RedisConfigs/docker-compose.yml index 6475e6664..cb3dd099c 100644 --- a/tests/RedisConfigs/docker-compose.yml +++ b/tests/RedisConfigs/docker-compose.yml @@ -1,4 +1,4 @@ -version: '2.5' +version: '2.6' services: redis: From d9caedbd91a37fb807cb61b16382dfd904e8cea0 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 22:12:40 -0500 Subject: [PATCH 094/117] Nick, you idiot. --- tests/StackExchange.Redis.Tests/Locking.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Locking.cs b/tests/StackExchange.Redis.Tests/Locking.cs index 9053d2956..69a86ac4d 100644 --- a/tests/StackExchange.Redis.Tests/Locking.cs +++ b/tests/StackExchange.Redis.Tests/Locking.cs @@ -99,7 +99,7 @@ private void TestLockOpCountByVersion(IConnectionMultiplexer conn, int expectedO Assert.Equal(!existFirst, taken); Assert.Equal(expectedVal, valAfter); - Assert.True(expectedOps >= countAfter - countBefore, $"{expectedOps} >= ({countAfter} - {countBefore})"); + Assert.True(expectedOps <= countAfter - countBefore, $"{expectedOps} >= ({countAfter} - {countBefore})"); // note we get a ping from GetCounters } From 3fa77a4736018ae56c22f356f30954d762cdfe9c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 22:32:37 -0500 Subject: [PATCH 095/117] Fix log message too --- tests/StackExchange.Redis.Tests/Locking.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Locking.cs b/tests/StackExchange.Redis.Tests/Locking.cs index 69a86ac4d..760b1b65f 100644 --- a/tests/StackExchange.Redis.Tests/Locking.cs +++ b/tests/StackExchange.Redis.Tests/Locking.cs @@ -99,8 +99,8 @@ private void TestLockOpCountByVersion(IConnectionMultiplexer conn, int expectedO Assert.Equal(!existFirst, taken); Assert.Equal(expectedVal, valAfter); - Assert.True(expectedOps <= countAfter - countBefore, $"{expectedOps} >= ({countAfter} - {countBefore})"); // note we get a ping from GetCounters + Assert.True(countAfter - countBefore >= expectedOps, $"({countAfter} - {countBefore}) >= {expectedOps}"); } private IConnectionMultiplexer Create(TestMode mode) => mode switch From 04233fe0f82a5ba0018833dcd87a60607c3fef5b Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 22:34:36 -0500 Subject: [PATCH 096/117] Sentinel: Fire and Forget on startup This lets us queue the subscription up on the subscriptions collection before it can succeed, but we won't eat the failure in connect from a race either. --- src/StackExchange.Redis/ConnectionMultiplexer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 2254cf50a..2e3fe9541 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -2321,7 +2321,7 @@ internal void InitializeSentinel(LogProxy logProxy) } } } - }); + }, CommandFlags.FireAndForget); } // If we lose connection to a sentinel server, From ff5401253665276e0fbee8e4aae721f02d6c2175 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sat, 22 Jan 2022 22:39:57 -0500 Subject: [PATCH 097/117] ExplicitPublishMode: remove delay --- tests/StackExchange.Redis.Tests/PubSub.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 43e0fcecb..b3d644ded 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -29,7 +29,6 @@ public async Task ExplicitPublishMode() pub.Subscribe(new RedisChannel("ab*d", RedisChannel.PatternMode.Auto), (x, y) => Interlocked.Increment(ref c)); pub.Subscribe("abc*", (x, y) => Interlocked.Increment(ref d)); - await Task.Delay(1000).ForAwait(); pub.Publish("abcd", "efg"); await UntilCondition(TimeSpan.FromSeconds(10), () => Thread.VolatileRead(ref b) == 1 From 84439d4f6120ef7ce1d64a447dcf124288ea0a2b Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 23 Jan 2022 12:55:34 -0500 Subject: [PATCH 098/117] Cleanup and comments! --- src/StackExchange.Redis/RedisSubscriber.cs | 116 +++++++++++++++------ src/StackExchange.Redis/ResultProcessor.cs | 2 +- 2 files changed, 85 insertions(+), 33 deletions(-) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index 152a0a492..ac55030d2 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Concurrent; +using System.Diagnostics.CodeAnalysis; using System.Net; using System.Threading; using System.Threading.Tasks; @@ -39,6 +40,10 @@ internal bool TryRemoveSubscription(in RedisChannel channel, out Subscription su } } + /// + /// Gets the subscriber counts for a channel. + /// + /// True if there's a subscription registered at all. internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out int queues) { if (subscriptions.TryGetValue(channel, out var sub)) @@ -50,6 +55,13 @@ internal bool GetSubscriberCounts(in RedisChannel channel, out int handlers, out return false; } + /// + /// Gets which server, if any, there's a registered subscription to for this channel. + /// + /// + /// This may be null if there is a subscription, but we don't have a connected server at the moment. + /// This behavior is fine but IsConnected checks, but is a subtle difference in . + /// internal ServerEndPoint GetSubscribedServer(in RedisChannel channel) { if (!channel.IsNullOrEmpty && subscriptions.TryGetValue(channel, out Subscription sub)) @@ -59,6 +71,9 @@ internal ServerEndPoint GetSubscribedServer(in RedisChannel channel) return null; } + /// + /// Handler that executes whenever a message comes in, this doles out messages to any registered handlers. + /// internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, in RedisValue payload) { ICompletable completable = null; @@ -77,24 +92,37 @@ internal void OnMessage(in RedisChannel subscription, in RedisChannel channel, i } } - internal void EnsureSubscriptions(CommandFlags flags = CommandFlags.None) + /// + /// Updates all subscriptions re-evaluating their state. + /// This clears the current server if it's not connected, prepping them to reconnect. + /// + internal void UpdateSubscriptions() { foreach (var pair in subscriptions) { - DefaultSubscriber.EnsureSubscribedToServer(pair.Value, pair.Key, flags, true); + pair.Value.UpdateServer(); } } - internal void UpdateSubscriptions() + /// + /// Ensures all subscriptions are connected to a server, if possible. + /// + internal void EnsureSubscriptions(CommandFlags flags = CommandFlags.None) { + // TODO: Subscribe with variadic commands to reduce round trips foreach (var pair in subscriptions) { - pair.Value.UpdateServer(); + DefaultSubscriber.EnsureSubscribedToServer(pair.Value, pair.Key, flags, true); } } + /// + /// Ensures all subscriptions are connected to a server, if possible. + /// internal async Task EnsureSubscriptionsAsync(CommandFlags flags = CommandFlags.None) { + // TODO: Evaluate performance here, this isn't good for a large number of subscriptions. + // It's probable we want to fire and forget `n` here, recording how many are going to try to reconnect? long count = 0; foreach (var pair in subscriptions) { @@ -112,6 +140,11 @@ internal enum SubscriptionAction Unsubscribe } + /// + /// This is the record of a single subscription to a redis server. + /// It's the singular channel (which may or may not be a pattern), to one or more handlers. + /// We subscriber to a redis server once (for all messages) and execute 1-many handlers when a message arrives. + /// internal sealed class Subscription { private Action _handlers; @@ -120,6 +153,10 @@ internal sealed class Subscription public CommandFlags Flags { get; } public ResultProcessor.TrackSubscriptionsProcessor Processor { get; } + /// + /// Whether the we have is connected. + /// Since we clear on a disconnect, this should stay correct. + /// internal bool IsConnected => CurrentServer?.IsSubscriberConnected == true; public Subscription(CommandFlags flags) @@ -128,6 +165,9 @@ public Subscription(CommandFlags flags) Processor = new ResultProcessor.TrackSubscriptionsProcessor(this); } + /// + /// Gets the configured (P)SUBSCRIBE or (P)UNSUBSCRIBE for an action. + /// internal Message GetMessage(RedisChannel channel, SubscriptionAction action, CommandFlags flags, bool internalCall) { var isPattern = channel.IsPatternBased; @@ -151,8 +191,6 @@ internal Message GetMessage(RedisChannel channel, SubscriptionAction action, Com return msg; } - internal void SetServer(ServerEndPoint server) => CurrentServer = server; - public void Add(Action handler, ChannelMessageQueue queue) { if (handler != null) @@ -165,6 +203,19 @@ public void Add(Action handler, ChannelMessageQueue qu } } + public bool Remove(Action handler, ChannelMessageQueue queue) + { + if (handler != null) + { + _handlers -= handler; + } + if (queue != null) + { + ChannelMessageQueue.Remove(ref _queues, queue); + } + return _handlers == null & _queues == null; + } + public ICompletable ForInvoke(in RedisChannel channel, in RedisValue message, out ChannelMessageQueue queues) { var handlers = _handlers; @@ -178,18 +229,6 @@ internal void MarkCompleted() ChannelMessageQueue.MarkAllCompleted(ref _queues); } - public bool Remove(Action handler, ChannelMessageQueue queue) - { - if (handler != null) - { - _handlers -= handler; - } - if (queue != null) - { - ChannelMessageQueue.Remove(ref _queues, queue); - } - return _handlers == null & _queues == null; - } internal void GetSubscriberCounts(out int handlers, out int queues) { queues = ChannelMessageQueue.Count(ref _queues); @@ -210,17 +249,28 @@ internal void GetSubscriberCounts(out int handlers, out int queues) } internal ServerEndPoint GetCurrentServer() => Volatile.Read(ref CurrentServer); + internal void SetCurrentServer(ServerEndPoint server) => CurrentServer = server; + /// + /// Evaluates state and if we're not currently connected, clears the server reference. + /// internal void UpdateServer() { if (!IsConnected) { - SetServer(null); + CurrentServer = null; } } } } + /// + /// A wrapper for subscription actions. + /// + /// + /// By having most functionality here and state on , we can + /// use the baseline execution methods to take the normal message paths. + /// internal sealed class RedisSubscriber : RedisBase, ISubscriber { internal RedisSubscriber(ConnectionMultiplexer multiplexer, object asyncState) : base(multiplexer, asyncState) @@ -241,6 +291,10 @@ public Task IdentifyEndpointAsync(RedisChannel channel, CommandFlags f return ExecuteAsync(msg, ResultProcessor.ConnectionIdentity); } + /// + /// This is *could* we be connected, as in "what's the theoretical endpoint for this channel?", + /// rather than if we're actually connected and actually listening on that channel. + /// public bool IsConnected(RedisChannel channel = default(RedisChannel)) { var server = multiplexer.GetSubscribedServer(channel) ?? multiplexer.SelectServer(RedisCommand.SUBSCRIBE, CommandFlags.DemandMaster, channel); @@ -332,7 +386,7 @@ internal bool EnsureSubscribedToServer(Subscription sub, RedisChannel channel, C // TODO: Cleanup old hangers here? - sub.SetServer(null); // we're not appropriately connected, so blank it out for eligible reconnection + sub.SetCurrentServer(null); // we're not appropriately connected, so blank it out for eligible reconnection var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); var selected = multiplexer.SelectServer(message); return multiplexer.ExecuteSyncImpl(message, sub.Processor, selected); @@ -364,7 +418,7 @@ public Task EnsureSubscribedToServerAsync(Subscription sub, RedisChannel c // TODO: Cleanup old hangers here? - sub.SetServer(null); // we're not appropriately connected, so blank it out for eligible reconnection + sub.SetCurrentServer(null); // we're not appropriately connected, so blank it out for eligible reconnection var message = sub.GetMessage(channel, SubscriptionAction.Subscribe, flags, internalCall); var selected = multiplexer.SelectServer(message); return ExecuteAsync(message, sub.Processor, selected); @@ -375,6 +429,7 @@ public Task EnsureSubscribedToServerAsync(Subscription sub, RedisChannel c void ISubscriber.Unsubscribe(RedisChannel channel, Action handler, CommandFlags flags) => Unsubscribe(channel, handler, null, flags); + [SuppressMessage("Style", "IDE0075:Simplify conditional expression", Justification = "The suggestion sucks.")] public bool Unsubscribe(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { ThrowIfNull(channel); @@ -423,19 +478,16 @@ private bool UnregisterSubscription(in RedisChannel channel, Action connection.BridgeCouldBeNull?.ServerEndPoint, _ => null }; - Subscription?.SetServer(newServer); + Subscription?.SetCurrentServer(newServer); return true; } } From 60d9b8002764dd22ed15b0ff3d9f87782a13f56d Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 23 Jan 2022 13:08:15 -0500 Subject: [PATCH 099/117] More comments! --- src/StackExchange.Redis/RedisSubscriber.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/StackExchange.Redis/RedisSubscriber.cs b/src/StackExchange.Redis/RedisSubscriber.cs index ac55030d2..28ff75c98 100644 --- a/src/StackExchange.Redis/RedisSubscriber.cs +++ b/src/StackExchange.Redis/RedisSubscriber.cs @@ -433,6 +433,7 @@ void ISubscriber.Unsubscribe(RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { ThrowIfNull(channel); + // Unregister the subscription handler/queue, and if that returns true (last handler removed), also disconnect from the server return UnregisterSubscription(channel, handler, queue, out var sub) ? UnsubscribeFromServer(sub, channel, flags, false) : true; @@ -454,6 +455,7 @@ Task ISubscriber.UnsubscribeAsync(RedisChannel channel, Action UnsubscribeAsync(in RedisChannel channel, Action handler, ChannelMessageQueue queue, CommandFlags flags) { ThrowIfNull(channel); + // Unregister the subscription handler/queue, and if that returns true (last handler removed), also disconnect from the server return UnregisterSubscription(channel, handler, queue, out var sub) ? UnsubscribeFromServerAsync(sub, channel, flags, asyncState, false) : CompletedTask.Default(asyncState); From c6fb5698116ccd77d719bd999336a927f8cc246c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 23 Jan 2022 13:34:46 -0500 Subject: [PATCH 100/117] Annnnnd the other Sentinel one --- src/StackExchange.Redis/ConnectionMultiplexer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 2e3fe9541..d6f89c35a 100644 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -2340,7 +2340,7 @@ internal void InitializeSentinel(LogProxy logProxy) { string[] messageParts = ((string)message).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); UpdateSentinelAddressList(messageParts[0]); - }); + }, CommandFlags.FireAndForget); } } From c0206fac530f395059d9c6f0907b8ad7f09b8332 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 23 Jan 2022 23:17:10 -0500 Subject: [PATCH 101/117] Start PubSubMultiserver --- .../SimulatedFailureType.cs | 5 ++ tests/StackExchange.Redis.Tests/Cluster.cs | 10 +-- .../Helpers/TestConfig.cs | 2 + .../PubSubMultiserver.cs | 78 +++++++++++++++++++ 4 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 tests/StackExchange.Redis.Tests/PubSubMultiserver.cs diff --git a/src/StackExchange.Redis/SimulatedFailureType.cs b/src/StackExchange.Redis/SimulatedFailureType.cs index 0084746a7..80fca095c 100644 --- a/src/StackExchange.Redis/SimulatedFailureType.cs +++ b/src/StackExchange.Redis/SimulatedFailureType.cs @@ -10,8 +10,13 @@ internal enum SimulatedFailureType InteractiveOutbound = 1 << 1, SubscriptionInbound = 1 << 2, SubscriptionOutbound = 1 << 3, + AllInbound = InteractiveInbound | SubscriptionInbound, AllOutbound = InteractiveOutbound | SubscriptionOutbound, + + AllInteractive = InteractiveInbound | InteractiveOutbound, + AllSubscription = SubscriptionInbound | SubscriptionOutbound, + All = AllInbound | AllOutbound, } } diff --git a/tests/StackExchange.Redis.Tests/Cluster.cs b/tests/StackExchange.Redis.Tests/Cluster.cs index a7af400da..6e6e6e7c3 100644 --- a/tests/StackExchange.Redis.Tests/Cluster.cs +++ b/tests/StackExchange.Redis.Tests/Cluster.cs @@ -3,7 +3,6 @@ using System.IO; using System.Linq; using System.Net; -using System.Text; using System.Threading; using System.Threading.Tasks; using StackExchange.Redis.Profiling; @@ -15,14 +14,7 @@ namespace StackExchange.Redis.Tests public class Cluster : TestBase { public Cluster(ITestOutputHelper output) : base (output) { } - - protected override string GetConfiguration() - { - var server = TestConfig.Current.ClusterServer; - return string.Join(",", - Enumerable.Range(TestConfig.Current.ClusterStartPort, TestConfig.Current.ClusterServerCount).Select(port => server + ":" + port) - ) + ",connectTimeout=10000"; - } + protected override string GetConfiguration() => TestConfig.Current.ClusterServersAndPorts + ",connectTimeout=10000"; [Fact] public void ExportConfiguration() diff --git a/tests/StackExchange.Redis.Tests/Helpers/TestConfig.cs b/tests/StackExchange.Redis.Tests/Helpers/TestConfig.cs index 07cbc6e58..a11afa18c 100644 --- a/tests/StackExchange.Redis.Tests/Helpers/TestConfig.cs +++ b/tests/StackExchange.Redis.Tests/Helpers/TestConfig.cs @@ -2,6 +2,7 @@ using System; using Newtonsoft.Json; using System.Threading; +using System.Linq; namespace StackExchange.Redis.Tests { @@ -87,6 +88,7 @@ public class Config public string ClusterServer { get; set; } = "127.0.0.1"; public int ClusterStartPort { get; set; } = 7000; public int ClusterServerCount { get; set; } = 6; + public string ClusterServersAndPorts => string.Join(",", Enumerable.Range(ClusterStartPort, ClusterServerCount).Select(port => ClusterServer + ":" + port)); public string SslServer { get; set; } public int SslPort { get; set; } diff --git a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs new file mode 100644 index 000000000..2b1a5304c --- /dev/null +++ b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs @@ -0,0 +1,78 @@ +using System; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +namespace StackExchange.Redis.Tests +{ + [Collection(SharedConnectionFixture.Key)] + public class PubSubMultiserver : TestBase + { + public PubSubMultiserver(ITestOutputHelper output, SharedConnectionFixture fixture) : base(output, fixture) { } + protected override string GetConfiguration() => TestConfig.Current.ClusterServersAndPorts + ",connectTimeout=10000"; + + [Fact] + public void ChannelSharding() + { + using var muxer = Create(channelPrefix: Me()) as ConnectionMultiplexer; + + var defaultSlot = muxer.ServerSelectionStrategy.HashSlot(default(RedisChannel)); + var slot1 = muxer.ServerSelectionStrategy.HashSlot((RedisChannel)"hey"); + var slot2 = muxer.ServerSelectionStrategy.HashSlot((RedisChannel)"hey2"); + + Assert.NotEqual(defaultSlot, slot1); + Assert.NotEqual(ServerSelectionStrategy.NoSlot, slot1); + Assert.NotEqual(slot1, slot2); + } + + [Fact] + public async Task SubscriptionNodeReconnecting() + { + Log("Connecting..."); + using var muxer = Create(allowAdmin: true) as ConnectionMultiplexer; + var sub = muxer.GetSubscriber(); + var channel = (RedisChannel)Me(); + + Log("Subscribing..."); + await sub.SubscribeAsync(channel, (channel, val) => Log("Message: " + val)); + + Assert.True(sub.IsConnected(channel)); + + var endpoint = sub.SubscribedEndpoint(channel); + var subscribedServer = muxer.GetServer(endpoint); + var subscribedServerEndpoint = muxer.GetServerEndPoint(endpoint); + + Assert.True(subscribedServer.IsConnected, "subscribedServer.IsConnected"); + Assert.True(subscribedServerEndpoint.IsConnected, "subscribedServerEndpoint.IsConnected"); + Assert.True(subscribedServerEndpoint.IsSubscriberConnected, "subscribedServerEndpoint.IsSubscriberConnected"); + + Assert.True(muxer.TryGetSubscription(channel, out var subscription)); + var initialServer = subscription.GetCurrentServer(); + Assert.NotNull(initialServer); + Assert.True(initialServer.IsConnected); + Log($"Connected to: " + initialServer); + + muxer.AllowConnect = false; + subscribedServerEndpoint.SimulateConnectionFailure(SimulatedFailureType.AllSubscription); + + Assert.True(subscribedServerEndpoint.IsConnected, "subscribedServerEndpoint.IsConnected"); + Assert.False(subscribedServerEndpoint.IsSubscriberConnected, "subscribedServerEndpoint.IsSubscriberConnected"); + + await UntilCondition(TimeSpan.FromSeconds(5), () => subscription.IsConnected); + Assert.True(subscription.IsConnected); + + var newServer = subscription.GetCurrentServer(); + Assert.NotNull(newServer); + Assert.NotEqual(newServer, initialServer); + Log($"Now connected to: " + initialServer); + } + + // 04:14:23.7955: Connection failed(InternalFailure): 127.0.0.1:7002/Subscription: StackExchange.Redis.RedisConnectionException: InternalFailure on 127.0.0.1:7002/Subscription, Initializing/NotStarted, last: SUBSCRIBE, origin: ConnectedAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 60s, state: Connecting, mgr: 9 of 10 available, last-heartbeat: never, last-mbeat: 0s ago, global: 23s ago, v: 2.5.49.64454 ---> StackExchange.Redis.RedisConnectionException: debugging + //at StackExchange.Redis.PhysicalConnection.OnDebugAbort() in C:\git\StackExchange\StackExchange.Redis\src\StackExchange.Redis\PhysicalConnection.cs:line 1560 + // at StackExchange.Redis.PhysicalConnection.d__104.MoveNext() in C:\git\StackExchange\StackExchange.Redis\src\StackExchange.Redis\PhysicalConnection.cs:line 1389 + // --- End of inner exception stack trace --- + + // TODO: Primary/Replica failover + // TODO: Subscribe failover, but with CommandFlags + } +} From b484e5f094a2a70239181221adb3ae92955507c1 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 08:34:13 -0500 Subject: [PATCH 102/117] Tests, yay! --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 +- .../PubSubMultiserver.cs | 36 ++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index b3d644ded..f84efe8e9 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -54,7 +54,7 @@ await UntilCondition(TimeSpan.FromSeconds(10), [InlineData("Foo:", true, "f")] public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string breaker) { - using (var muxer = Create(channelPrefix: channelPrefix, log: Writer)) + using (var muxer = Create(channelPrefix: channelPrefix, shared: false, log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); diff --git a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs index 2b1a5304c..85ce6b2a4 100644 --- a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs +++ b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs @@ -1,4 +1,5 @@ using System; +using System.Threading; using System.Threading.Tasks; using Xunit; using Xunit.Abstractions; @@ -33,11 +34,24 @@ public async Task SubscriptionNodeReconnecting() var sub = muxer.GetSubscriber(); var channel = (RedisChannel)Me(); + var count = 0; Log("Subscribing..."); - await sub.SubscribeAsync(channel, (channel, val) => Log("Message: " + val)); - + await sub.SubscribeAsync(channel, (channel, val) => + { + Interlocked.Increment(ref count); + Log("Message: " + val); + }); Assert.True(sub.IsConnected(channel)); + Log("Publishing (1)..."); + Assert.Equal(0, count); + var publishedTo = await sub.PublishAsync(channel, "message1"); + // Client -> Redis -> Client -> handler takes just a moment + await UntilCondition(TimeSpan.FromSeconds(2), () => Volatile.Read(ref count) == 1); + Assert.Equal(1, count); + Log($" Published (1) to {publishedTo} subscriber(s)."); + Assert.Equal(1, publishedTo); + var endpoint = sub.SubscribedEndpoint(channel); var subscribedServer = muxer.GetServer(endpoint); var subscribedServerEndpoint = muxer.GetServerEndPoint(endpoint); @@ -64,14 +78,20 @@ public async Task SubscriptionNodeReconnecting() var newServer = subscription.GetCurrentServer(); Assert.NotNull(newServer); Assert.NotEqual(newServer, initialServer); - Log($"Now connected to: " + initialServer); + Log($"Now connected to: " + newServer); + + count = 0; + Log("Publishing (2)..."); + Assert.Equal(0, count); + publishedTo = await sub.PublishAsync(channel, "message2"); + // Client -> Redis -> Client -> handler takes just a moment + await UntilCondition(TimeSpan.FromSeconds(2), () => Volatile.Read(ref count) == 1); + Assert.Equal(1, count); + Log($" Published (2) to {publishedTo} subscriber(s)."); + + ClearAmbientFailures(); } - // 04:14:23.7955: Connection failed(InternalFailure): 127.0.0.1:7002/Subscription: StackExchange.Redis.RedisConnectionException: InternalFailure on 127.0.0.1:7002/Subscription, Initializing/NotStarted, last: SUBSCRIBE, origin: ConnectedAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 60s, state: Connecting, mgr: 9 of 10 available, last-heartbeat: never, last-mbeat: 0s ago, global: 23s ago, v: 2.5.49.64454 ---> StackExchange.Redis.RedisConnectionException: debugging - //at StackExchange.Redis.PhysicalConnection.OnDebugAbort() in C:\git\StackExchange\StackExchange.Redis\src\StackExchange.Redis\PhysicalConnection.cs:line 1560 - // at StackExchange.Redis.PhysicalConnection.d__104.MoveNext() in C:\git\StackExchange\StackExchange.Redis\src\StackExchange.Redis\PhysicalConnection.cs:line 1389 - // --- End of inner exception stack trace --- - // TODO: Primary/Replica failover // TODO: Subscribe failover, but with CommandFlags } From 837a654e0e2565edc91040882789998d9130c50f Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 09:39:20 -0500 Subject: [PATCH 103/117] Assert up front --- tests/StackExchange.Redis.Tests/PubSub.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index f84efe8e9..46ce97a2a 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -325,6 +325,7 @@ public async Task PubSubGetAllAnyOrder() const int count = 1000; var syncLock = new object(); + Assert.True(sub.IsConnected()); var data = new HashSet(); await sub.SubscribeAsync(channel, (_, val) => { From b0001ab83e0e84574157aaac3b67045317b78ecd Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 09:51:41 -0500 Subject: [PATCH 104/117] PubSub tests: log everything Hunting this immediate-subscribe-no-connection issue. --- tests/StackExchange.Redis.Tests/PubSub.cs | 45 ++++++++++++----------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 46ce97a2a..9c94213fc 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -20,7 +20,7 @@ public PubSub(ITestOutputHelper output, SharedConnectionFixture fixture) : base( [Fact] public async Task ExplicitPublishMode() { - using (var mx = Create(channelPrefix: "foo:")) + using (var mx = Create(channelPrefix: "foo:", log: Writer)) { var pub = mx.GetSubscriber(); int a = 0, b = 0, c = 0, d = 0; @@ -58,7 +58,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); HashSet received = new(); int secondHandler = 0; string subChannel = (wildCard ? "a*c" : "abc") + breaker; @@ -88,7 +88,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br Assert.Equal(0, Thread.VolatileRead(ref secondHandler)); var count = sub.Publish(pubChannel, "def"); - await PingAsync(muxer, pub, sub, 3).ForAwait(); + await PingAsync(pub, sub, 3).ForAwait(); await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) @@ -102,7 +102,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br // unsubscribe from first; should still see second sub.Unsubscribe(subChannel, handler1); count = sub.Publish(pubChannel, "ghi"); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); lock (received) { Assert.Single(received); @@ -118,7 +118,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br // unsubscribe from second; should see nothing this time sub.Unsubscribe(subChannel, handler2); count = sub.Publish(pubChannel, "ghi"); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); lock (received) { Assert.Single(received); @@ -133,7 +133,7 @@ public async Task TestBasicPubSub(string channelPrefix, bool wildCard, string br [Fact] public async Task TestBasicPubSubFireAndForget() { - using (var muxer = Create(log: Writer, shared: false)) + using (var muxer = Create(shared: false, log: Writer)) { var profiler = muxer.AddProfiler(); var pub = GetAnyMaster(muxer); @@ -142,7 +142,7 @@ public async Task TestBasicPubSubFireAndForget() RedisChannel key = Me() + Guid.NewGuid(); HashSet received = new(); int secondHandler = 0; - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); sub.Subscribe(key, (channel, payload) => { lock (received) @@ -162,9 +162,9 @@ public async Task TestBasicPubSubFireAndForget() Assert.Empty(received); } Assert.Equal(0, Thread.VolatileRead(ref secondHandler)); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); var count = sub.Publish(key, "def", CommandFlags.FireAndForget); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); Log(profiler); @@ -178,7 +178,7 @@ public async Task TestBasicPubSubFireAndForget() sub.Unsubscribe(key); count = sub.Publish(key, "ghi", CommandFlags.FireAndForget); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); Log(profiler); lock (received) { @@ -188,7 +188,7 @@ public async Task TestBasicPubSubFireAndForget() } } - private async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, ISubscriber sub, int times = 1) + private async Task PingAsync(IServer pub, ISubscriber sub, int times = 1) { while (times-- > 0) { @@ -206,7 +206,7 @@ private async Task PingAsync(IConnectionMultiplexer muxer, IServer pub, ISubscri [Fact] public async Task TestPatternPubSub() { - using (var muxer = Create(shared: false)) + using (var muxer = Create(shared: false, log: Writer)) { var pub = GetAnyMaster(muxer); var sub = muxer.GetSubscriber(); @@ -231,9 +231,9 @@ public async Task TestPatternPubSub() } Assert.Equal(0, Thread.VolatileRead(ref secondHandler)); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); var count = sub.Publish("abc", "def"); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); await UntilCondition(TimeSpan.FromSeconds(5), () => received.Count == 1); lock (received) @@ -248,7 +248,7 @@ public async Task TestPatternPubSub() sub.Unsubscribe("a*c"); count = sub.Publish("abc", "ghi"); - await PingAsync(muxer, pub, sub).ForAwait(); + await PingAsync(pub, sub).ForAwait(); lock (received) { @@ -318,7 +318,7 @@ private void TestMassivePublish(ISubscriber conn, string channel, string caption [Fact] public async Task PubSubGetAllAnyOrder() { - using (var muxer = Create(syncTimeout: 20000, shared: false)) + using (var muxer = Create(syncTimeout: 20000, shared: false, log: Writer)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); @@ -440,7 +440,7 @@ await Assert.ThrowsAsync(async delegate [Fact] public async Task PubSubGetAllCorrectOrder_OnMessage_Sync() { - using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000)) + using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000, log: Writer)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); @@ -509,7 +509,7 @@ await Assert.ThrowsAsync(async delegate [Fact] public async Task PubSubGetAllCorrectOrder_OnMessage_Async() { - using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000)) + using (var muxer = Create(configuration: TestConfig.Current.RemoteServerAndPort, syncTimeout: 20000, log: Writer)) { var sub = muxer.GetSubscriber(); RedisChannel channel = Me(); @@ -604,8 +604,8 @@ public async Task TestPublishWithSubscribers() public async Task TestMultipleSubscribersGetMessage() { var channel = Me(); - using (var muxerA = Create(shared: false)) - using (var muxerB = Create(shared: false)) + using (var muxerA = Create(shared: false, log: Writer)) + using (var muxerB = Create(shared: false, log: Writer)) using (var conn = Create()) { var listenA = muxerA.GetSubscriber(); @@ -635,7 +635,7 @@ public async Task TestMultipleSubscribersGetMessage() public async Task Issue38() { // https://code.google.com/p/booksleeve/issues/detail?id=38 - using (var pub = Create()) + using (var pub = Create(log: Writer)) { var sub = pub.GetSubscriber(); int count = 0; @@ -762,12 +762,13 @@ public async Task AzureRedisEventsAutomaticSubscribe() [Fact] public async Task SubscriptionsSurviveConnectionFailureAsync() { - using (var muxer = Create(allowAdmin: true, shared: false, syncTimeout: 1000) as ConnectionMultiplexer) + using (var muxer = Create(allowAdmin: true, shared: false, log: Writer, syncTimeout: 1000) as ConnectionMultiplexer) { var profiler = muxer.AddProfiler(); RedisChannel channel = Me(); var sub = muxer.GetSubscriber(); int counter = 0; + Assert.True(sub.IsConnected()); await sub.SubscribeAsync(channel, delegate { Interlocked.Increment(ref counter); From 1bc971dd6659ac22d7f906f48ab7e9842568e1fe Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 19:37:21 -0500 Subject: [PATCH 105/117] Primary/Replica tests --- .../PubSubMultiserver.cs | 94 ++++++++++++++++++- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs index 85ce6b2a4..a0a405fac 100644 --- a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs +++ b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs @@ -27,7 +27,7 @@ public void ChannelSharding() } [Fact] - public async Task SubscriptionNodeReconnecting() + public async Task ClusterNodeSubscriptionFailover() { Log("Connecting..."); using var muxer = Create(allowAdmin: true) as ConnectionMultiplexer; @@ -92,7 +92,95 @@ await sub.SubscribeAsync(channel, (channel, val) => ClearAmbientFailures(); } - // TODO: Primary/Replica failover - // TODO: Subscribe failover, but with CommandFlags + [Theory] + [InlineData(CommandFlags.PreferMaster, true)] + [InlineData(CommandFlags.PreferReplica, true)] + [InlineData(CommandFlags.DemandMaster, false)] + [InlineData(CommandFlags.DemandReplica, false)] + public async Task PrimaryReplicaSubscriptionFailover(CommandFlags flags, bool expectSuccess) + { + var config = TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; + Log("Connecting..."); + using var muxer = Create(configuration: config, allowAdmin: true) as ConnectionMultiplexer; + var sub = muxer.GetSubscriber(); + var channel = (RedisChannel)Me(); + + var count = 0; + Log("Subscribing..."); + await sub.SubscribeAsync(channel, (channel, val) => + { + Interlocked.Increment(ref count); + Log("Message: " + val); + }, flags); + Assert.True(sub.IsConnected(channel)); + + Log("Publishing (1)..."); + Assert.Equal(0, count); + var publishedTo = await sub.PublishAsync(channel, "message1"); + // Client -> Redis -> Client -> handler takes just a moment + await UntilCondition(TimeSpan.FromSeconds(2), () => Volatile.Read(ref count) == 1); + Assert.Equal(1, count); + Log($" Published (1) to {publishedTo} subscriber(s)."); + + var endpoint = sub.SubscribedEndpoint(channel); + var subscribedServer = muxer.GetServer(endpoint); + var subscribedServerEndpoint = muxer.GetServerEndPoint(endpoint); + + Assert.True(subscribedServer.IsConnected, "subscribedServer.IsConnected"); + Assert.True(subscribedServerEndpoint.IsConnected, "subscribedServerEndpoint.IsConnected"); + Assert.True(subscribedServerEndpoint.IsSubscriberConnected, "subscribedServerEndpoint.IsSubscriberConnected"); + + Assert.True(muxer.TryGetSubscription(channel, out var subscription)); + var initialServer = subscription.GetCurrentServer(); + Assert.NotNull(initialServer); + Assert.True(initialServer.IsConnected); + Log($"Connected to: " + initialServer); + + muxer.AllowConnect = false; + subscribedServerEndpoint.SimulateConnectionFailure(SimulatedFailureType.AllSubscription); + + Assert.True(subscribedServerEndpoint.IsConnected, "subscribedServerEndpoint.IsConnected"); + Assert.False(subscribedServerEndpoint.IsSubscriberConnected, "subscribedServerEndpoint.IsSubscriberConnected"); + + if (expectSuccess) + { + await UntilCondition(TimeSpan.FromSeconds(5), () => subscription.IsConnected); + Assert.True(subscription.IsConnected); + + var newServer = subscription.GetCurrentServer(); + Assert.NotNull(newServer); + Assert.NotEqual(newServer, initialServer); + Log($"Now connected to: " + newServer); + } + else + { + // This subscription shouldn't be able to reconnect by flags (demanding an unavailable server) + await UntilCondition(TimeSpan.FromSeconds(2), () => subscription.IsConnected); + Assert.False(subscription.IsConnected); + Log("Unable to reconnect (as expected)"); + + // Allow connecting back to the original + muxer.AllowConnect = true; + await UntilCondition(TimeSpan.FromSeconds(2), () => subscription.IsConnected); + Assert.True(subscription.IsConnected); + + var newServer = subscription.GetCurrentServer(); + Assert.NotNull(newServer); + Assert.Equal(newServer, initialServer); + Log($"Now connected to: " + newServer); + } + + + count = 0; + Log("Publishing (2)..."); + Assert.Equal(0, count); + publishedTo = await sub.PublishAsync(channel, "message2"); + // Client -> Redis -> Client -> handler takes just a moment + await UntilCondition(TimeSpan.FromSeconds(2), () => Volatile.Read(ref count) == 1); + Assert.Equal(1, count); + Log($" Published (2) to {publishedTo} subscriber(s)."); + + ClearAmbientFailures(); + } } } From 9c31958b6dcca2d2a9b5a8dfca4f9495c160fede Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 20:13:50 -0500 Subject: [PATCH 106/117] Fix PubSub tests: can't share that connection yo These will pile up handlers if we share the multiplexer, doh. --- tests/StackExchange.Redis.Tests/PubSubMultiserver.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs index a0a405fac..1a667970e 100644 --- a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs +++ b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs @@ -101,7 +101,7 @@ public async Task PrimaryReplicaSubscriptionFailover(CommandFlags flags, bool ex { var config = TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; Log("Connecting..."); - using var muxer = Create(configuration: config, allowAdmin: true) as ConnectionMultiplexer; + using var muxer = Create(configuration: config, shared: false, allowAdmin: true) as ConnectionMultiplexer; var sub = muxer.GetSubscriber(); var channel = (RedisChannel)Me(); From eed3ba032fd7c2cd5dd28c204cfc9971c8ded6ba Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 20:33:17 -0500 Subject: [PATCH 107/117] Remove the .NET 5.0 from Windows build too... Speeding up the Windows PR build --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 32c118f8c..6d2c0e8d6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -50,7 +50,6 @@ jobs: with: dotnet-version: | 3.1.x - 5.0.x 6.0.x - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true From 2fe8d134c61941fa0d777057af06fd7f04ec3658 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 21:02:19 -0500 Subject: [PATCH 108/117] Sentinel: account for multi-suite failover states We can be in a still-recovering replica state after the first test suite ran - allow it a moment to get back to good when running these. --- tests/StackExchange.Redis.Tests/Sentinel.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Sentinel.cs b/tests/StackExchange.Redis.Tests/Sentinel.cs index 332c37877..fbe4e05e9 100644 --- a/tests/StackExchange.Redis.Tests/Sentinel.cs +++ b/tests/StackExchange.Redis.Tests/Sentinel.cs @@ -359,8 +359,11 @@ public async Task SentinelMastersAsyncTest() } [Fact] - public void SentinelReplicasTest() + public async Task SentinelReplicasTest() { + // Give previous test run a moment to reset when multi-framework failover is in play. + await UntilCondition(TimeSpan.FromSeconds(5), () => SentinelServerA.SentinelReplicas(ServiceName).Length > 0); + var replicaConfigs = SentinelServerA.SentinelReplicas(ServiceName); Assert.True(replicaConfigs.Length > 0, "Has replicaConfigs"); Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); @@ -378,6 +381,9 @@ public void SentinelReplicasTest() [Fact] public async Task SentinelReplicasAsyncTest() { + // Give previous test run a moment to reset when multi-framework failover is in play. + await UntilCondition(TimeSpan.FromSeconds(5), () => SentinelServerA.SentinelReplicas(ServiceName).Length > 0); + var replicaConfigs = await SentinelServerA.SentinelReplicasAsync(ServiceName).ForAwait(); Assert.True(replicaConfigs.Length > 0, "Has replicaConfigs"); Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); From d23423502bf59b075f99bf7ef4bd267486764126 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 21:57:36 -0500 Subject: [PATCH 109/117] ExecuteWithUnsubscribeViaSubscriber: don't share conn --- tests/StackExchange.Redis.Tests/Issues/Issue1101.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs index bb471b744..c81a6e004 100644 --- a/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs +++ b/tests/StackExchange.Redis.Tests/Issues/Issue1101.cs @@ -89,7 +89,7 @@ public async Task ExecuteWithUnsubscribeViaChannel() [Fact] public async Task ExecuteWithUnsubscribeViaSubscriber() { - using (var muxer = Create(log: Writer)) + using (var muxer = Create(shared: false, log: Writer)) { RedisChannel name = Me(); var pubsub = muxer.GetSubscriber(); From 33f52af9a0737b6e83ed95f6d0bef50cfde14458 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 24 Jan 2022 22:17:11 -0500 Subject: [PATCH 110/117] Re-disable TestMassivePublishWithWithoutFlush_Local Server flooding, is no good. --- tests/StackExchange.Redis.Tests/PubSub.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/PubSub.cs b/tests/StackExchange.Redis.Tests/PubSub.cs index 9c94213fc..2c4d6ef80 100644 --- a/tests/StackExchange.Redis.Tests/PubSub.cs +++ b/tests/StackExchange.Redis.Tests/PubSub.cs @@ -267,7 +267,7 @@ public void TestPublishWithNoSubscribers() } } - [Fact] + [FactLongRunning] public void TestMassivePublishWithWithoutFlush_Local() { using (var muxer = Create()) From f88e8e1bc53d5b26ff5a05fdec4e66748b786355 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 26 Jan 2022 14:03:34 -0500 Subject: [PATCH 111/117] PR feedback! --- src/StackExchange.Redis/Enums/CommandFlags.cs | 4 +++- src/StackExchange.Redis/Message.cs | 4 ++-- src/StackExchange.Redis/PhysicalConnection.cs | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/StackExchange.Redis/Enums/CommandFlags.cs b/src/StackExchange.Redis/Enums/CommandFlags.cs index 288c43d68..119fe22bb 100644 --- a/src/StackExchange.Redis/Enums/CommandFlags.cs +++ b/src/StackExchange.Redis/Enums/CommandFlags.cs @@ -81,6 +81,8 @@ public enum CommandFlags /// NoScriptCache = 512, - // 1024: Use subscription connection type; never user-specified, so not visible on the public API + // 1024: Removed - was used for async timeout checks; never user-specified, so not visible on the public API + + // 2048: Use subscription connection type; never user-specified, so not visible on the public API } } diff --git a/src/StackExchange.Redis/Message.cs b/src/StackExchange.Redis/Message.cs index 576d9cb31..84bbcd6d0 100644 --- a/src/StackExchange.Redis/Message.cs +++ b/src/StackExchange.Redis/Message.cs @@ -58,7 +58,7 @@ internal abstract class Message : ICompletable private const CommandFlags AskingFlag = (CommandFlags)32, ScriptUnavailableFlag = (CommandFlags)256, - DemandSubscriptionConnection = (CommandFlags)1024; + DemandSubscriptionConnection = (CommandFlags)2048; private const CommandFlags MaskMasterServerPreference = CommandFlags.DemandMaster | CommandFlags.DemandReplica @@ -663,7 +663,7 @@ internal void SetWriteTime() /// internal bool HasTimedOut(int now, int timeoutMilliseconds, out int millisecondsTaken) { - millisecondsTaken = unchecked(now - _writeTickCount); // note: we can't just check "if sent < cutoff" because of wrap-aro + millisecondsTaken = unchecked(now - _writeTickCount); // note: we can't just check "if sent < cutoff" because of wrap-around if (millisecondsTaken >= timeoutMilliseconds) { return true; diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 3cdb13724..5e7e0da93 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -650,6 +650,7 @@ internal void OnBridgeHeartbeat() foreach (var msg in _writtenAwaitingResponse) { // We only handle async timeouts here, synchronous timeouts are handled upstream. + // Those sync timeouts happen in ConnectionMultiplexer.ExecuteSyncImpl() via Monitor.Wait. if (msg.ResultBoxIsAsync && msg.HasTimedOut(now, timeout, out var elapsed)) { bool haveDeltas = msg.TryGetPhysicalState(out _, out _, out long sentDelta, out var receivedDelta) && sentDelta >= 0 && receivedDelta >= 0; From 21859119536cb2ddcf731ee3c833fd679e4dbc7c Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 26 Jan 2022 14:16:04 -0500 Subject: [PATCH 112/117] Adjust RedisServer.ExecuteSync/Async for "don't care" cases Good catch by @TimLovellSmith, this indeed should have a more constrained set of code being checked - we can still safely exit on the null and FireAndForget cases, only needing to wrap around the throws. --- src/StackExchange.Redis/RedisServer.cs | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/StackExchange.Redis/RedisServer.cs b/src/StackExchange.Redis/RedisServer.cs index b61d337ce..fc60c48f6 100644 --- a/src/StackExchange.Redis/RedisServer.cs +++ b/src/StackExchange.Redis/RedisServer.cs @@ -572,30 +572,41 @@ internal static Message CreateReplicaOfMessage(ServerEndPoint sendMessageTo, End } internal override Task ExecuteAsync(Message message, ResultProcessor processor, ServerEndPoint server = null) - { // inject our expected server automatically + { + // inject our expected server automatically if (server == null) server = this.server; FixFlags(message, server); - if (!server.IsConnected && !multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + if (!server.IsConnected) { if (message == null) return CompletedTask.Default(asyncState); if (message.IsFireAndForget) return CompletedTask.Default(null); // F+F explicitly does not get async-state - // no need to deny exec-sync here; will be complete before they see if - var tcs = TaskSource.Create(asyncState); - ConnectionMultiplexer.ThrowFailed(tcs, ExceptionFactory.NoConnectionAvailable(multiplexer, message, server)); - return tcs.Task; + // After the "don't care" cases above, if we can't queue then it's time to error - otherwise call through to queueing. + if (!multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + // no need to deny exec-sync here; will be complete before they see if + var tcs = TaskSource.Create(asyncState); + ConnectionMultiplexer.ThrowFailed(tcs, ExceptionFactory.NoConnectionAvailable(multiplexer, message, server)); + return tcs.Task; + } } return base.ExecuteAsync(message, processor, server); } internal override T ExecuteSync(Message message, ResultProcessor processor, ServerEndPoint server = null) - { // inject our expected server automatically + { + // inject our expected server automatically if (server == null) server = this.server; FixFlags(message, server); - if (!server.IsConnected && !multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + if (!server.IsConnected) { if (message == null || message.IsFireAndForget) return default(T); - throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server); + + // After the "don't care" cases above, if we can't queue then it's time to error - otherwise call through to queueing. + if (!multiplexer.RawConfig.BacklogPolicy.QueueWhileDisconnected) + { + throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server); + } } return base.ExecuteSync(message, processor, server); } From 3f694c507686788f5821b44113357968b0e20b5f Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 26 Jan 2022 14:31:59 -0500 Subject: [PATCH 113/117] Nick you dummy This is in seconds (I now loathe all non-timespan time APIs!), the whole point was for the keepalive to fire and detect failure. --- tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs index 194808eb9..606745a08 100644 --- a/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs +++ b/tests/StackExchange.Redis.Tests/ConnectingFailDetection.cs @@ -97,7 +97,7 @@ public async Task Issue922_ReconnectRaised() { var config = ConfigurationOptions.Parse(TestConfig.Current.MasterServerAndPort); config.AbortOnConnectFail = true; - config.KeepAlive = 10; + config.KeepAlive = 1; config.SyncTimeout = 1000; config.AsyncTimeout = 1000; config.ReconnectRetryPolicy = new ExponentialRetry(5000); From 2edf892c4d58d19465e4ba0e9950dffe55a227a8 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Wed, 26 Jan 2022 14:42:08 -0500 Subject: [PATCH 114/117] PubSubMultiserver: separate channels on publishers These could race between test runs - let's not do that. --- tests/StackExchange.Redis.Tests/PubSubMultiserver.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs index 1a667970e..87c55b470 100644 --- a/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs +++ b/tests/StackExchange.Redis.Tests/PubSubMultiserver.cs @@ -103,7 +103,7 @@ public async Task PrimaryReplicaSubscriptionFailover(CommandFlags flags, bool ex Log("Connecting..."); using var muxer = Create(configuration: config, shared: false, allowAdmin: true) as ConnectionMultiplexer; var sub = muxer.GetSubscriber(); - var channel = (RedisChannel)Me(); + var channel = (RedisChannel)(Me() + flags.ToString()); // Individual channel per case to not overlap publishers var count = 0; Log("Subscribing..."); @@ -117,10 +117,10 @@ await sub.SubscribeAsync(channel, (channel, val) => Log("Publishing (1)..."); Assert.Equal(0, count); var publishedTo = await sub.PublishAsync(channel, "message1"); + Log($" Published (1) to {publishedTo} subscriber(s)."); // Client -> Redis -> Client -> handler takes just a moment await UntilCondition(TimeSpan.FromSeconds(2), () => Volatile.Read(ref count) == 1); Assert.Equal(1, count); - Log($" Published (1) to {publishedTo} subscriber(s)."); var endpoint = sub.SubscribedEndpoint(channel); var subscribedServer = muxer.GetServer(endpoint); From b77d0a359b8dcaabf558f8e4c511e5ef6a21db30 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 4 Feb 2022 10:45:39 -0500 Subject: [PATCH 115/117] CI Tweaks (#1970) Based on #1912 (otherwise tests fail those PRs are working on, but this is good against `main` too) - Shortens the names in the check list - Removes .NET 5.0 SDK we don't need the runtime for anymore - Removes `netcoreapp3.1` from StackExchange.Redis.Tests (running `net472` and `net6.0` now) --- .github/workflows/CI.yml | 28 +++++++++---------- appveyor.yml | 2 -- .../StackExchange.Redis.Tests.csproj | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6d2c0e8d6..44bd09eb6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -name: CI Builds +name: CI on: pull_request: @@ -16,11 +16,10 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v1 - - name: Setup .NET Core + - name: Install .NET SDK uses: actions/setup-dotnet@v1 with: dotnet-version: | - 3.1.x 6.0.x - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true @@ -33,24 +32,25 @@ jobs: continue-on-error: true if: success() || failure() with: - name: StackExchange.Redis.Tests (Ubuntu) - Results + name: Test Results - Ubuntu path: 'test-results/*.trx' reporter: dotnet-trx - name: .NET Lib Pack run: dotnet pack src/StackExchange.Redis/StackExchange.Redis.csproj --no-build -c Release /p:Packing=true /p:PackageOutputPath=%CD%\.nupkgs /p:CI=true windows: - name: StackExchange.Redis (Windows Server 2019) - runs-on: windows-2019 + name: StackExchange.Redis (Windows Server 2022) + runs-on: windows-2022 + env: + NUGET_CERT_REVOCATION_MODE: offline # Disabling signing because of massive perf hit, see https://github.com/NuGet/Home/issues/11548 steps: - name: Checkout code uses: actions/checkout@v1 - - name: Setup .NET Core 3.x - uses: actions/setup-dotnet@v1 - with: - dotnet-version: | - 3.1.x - 6.0.x + # - name: Install .NET SDK + # uses: actions/setup-dotnet@v1 + # with: + # dotnet-version: | + # 6.0.x - name: .NET Build run: dotnet build Build.csproj -c Release /p:CI=true - name: Start Redis Services (v3.0.503) @@ -79,6 +79,6 @@ jobs: continue-on-error: true if: success() || failure() with: - name: StackExchange.Redis.Tests (Windows Server 2019) - Results + name: Tests Results - Windows Server 2022 path: 'test-results/*.trx' - reporter: dotnet-trx + reporter: dotnet-trx diff --git a/appveyor.yml b/appveyor.yml index 7387352eb..a2107f48c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,8 +6,6 @@ init: install: - cmd: >- - choco install dotnet-sdk --version 5.0.404 - choco install dotnet-sdk --version 6.0.101 cd tests\RedisConfigs\3.0.503 diff --git a/tests/StackExchange.Redis.Tests/StackExchange.Redis.Tests.csproj b/tests/StackExchange.Redis.Tests/StackExchange.Redis.Tests.csproj index 5cf895e50..454e97982 100644 --- a/tests/StackExchange.Redis.Tests/StackExchange.Redis.Tests.csproj +++ b/tests/StackExchange.Redis.Tests/StackExchange.Redis.Tests.csproj @@ -1,6 +1,6 @@  - net472;netcoreapp3.1;net6.0 + net472;net6.0 StackExchange.Redis.Tests true true From 04ce70b7642615cd7a7ae1fcd3a93019fce1aaa5 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Fri, 4 Feb 2022 10:47:37 -0500 Subject: [PATCH 116/117] Seal backlog --- src/StackExchange.Redis/BacklogPolicy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StackExchange.Redis/BacklogPolicy.cs b/src/StackExchange.Redis/BacklogPolicy.cs index 7666a90de..4fb9e67c7 100644 --- a/src/StackExchange.Redis/BacklogPolicy.cs +++ b/src/StackExchange.Redis/BacklogPolicy.cs @@ -6,7 +6,7 @@ /// or it could choose to fail fast and throw ASAP. Different apps desire different behaviors with backpressure and how to handle /// large amounts of load, so this is configurable to optimize the happy path but avoid spiral-of-death queue scenarios for others. /// - public class BacklogPolicy + public sealed class BacklogPolicy { /// /// Backlog behavior matching StackExchange.Redis's 2.x line, failing fast and not attempting to queue From 2c2b0ea51c406fecd765f851a96acbbce5f06f2d Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 6 Feb 2022 20:19:13 -0500 Subject: [PATCH 117/117] Backlog tests: add QueuesAndFlushesAfterReconnectingClusterAsync Ensures cluster is also cooperating as expected. --- .../StackExchange.Redis.Tests/BacklogTests.cs | 106 +++++++++++++++++- 1 file changed, 102 insertions(+), 4 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/BacklogTests.cs b/tests/StackExchange.Redis.Tests/BacklogTests.cs index ec70b0f81..990d15d61 100644 --- a/tests/StackExchange.Redis.Tests/BacklogTests.cs +++ b/tests/StackExchange.Redis.Tests/BacklogTests.cs @@ -12,9 +12,6 @@ public BacklogTests(ITestOutputHelper output) : base (output) { } protected override string GetConfiguration() => TestConfig.Current.MasterServerAndPort + "," + TestConfig.Current.ReplicaServerAndPort; - // TODO: Sync route testing (e.g. Ping() for TryWriteSync path) - // TODO: Specific server calls - [Fact] public async Task FailFast() { @@ -199,7 +196,6 @@ public async Task QueuesAndFlushesAfterReconnectingAsync() } } - [Fact] public async Task QueuesAndFlushesAfterReconnecting() { @@ -300,5 +296,107 @@ void disconnectedPings(int id) ClearAmbientFailures(); } } + + [Fact] + public async Task QueuesAndFlushesAfterReconnectingClusterAsync() + { + try + { + var options = ConfigurationOptions.Parse(TestConfig.Current.ClusterServersAndPorts); + options.BacklogPolicy = BacklogPolicy.Default; + options.AbortOnConnectFail = false; + options.ConnectTimeout = 1000; + options.ConnectRetry = 2; + options.SyncTimeout = 10000; + options.KeepAlive = 10000; + options.AsyncTimeout = 5000; + options.AllowAdmin = true; + options.SocketManager = SocketManager.ThreadPool; + + using var muxer = await ConnectionMultiplexer.ConnectAsync(options, Writer); + muxer.ErrorMessage += (s, e) => Log($"Error Message {e.EndPoint}: {e.Message}"); + muxer.InternalError += (s, e) => Log($"Internal Error {e.EndPoint}: {e.Exception.Message}"); + muxer.ConnectionFailed += (s, a) => Log("Disconnected: " + EndPointCollection.ToString(a.EndPoint)); + muxer.ConnectionRestored += (s, a) => Log("Reconnected: " + EndPointCollection.ToString(a.EndPoint)); + + var db = muxer.GetDatabase(); + Writer.WriteLine("Test: Initial (connected) ping"); + await db.PingAsync(); + + RedisKey meKey = Me(); + var getMsg = Message.Create(0, CommandFlags.None, RedisCommand.GET, meKey); + + var server = muxer.SelectServer(getMsg); // Get the server specifically for this message's hash slot + var stats = server.GetBridgeStatus(ConnectionType.Interactive); + Assert.Equal(0, stats.BacklogMessagesPending); // Everything's normal + + static Task PingAsync(ServerEndPoint server, CommandFlags flags = CommandFlags.None) + { + var message = ResultProcessor.TimingProcessor.CreateMessage(-1, flags, RedisCommand.PING); + + server.Multiplexer.CheckMessage(message); + return server.Multiplexer.ExecuteAsyncImpl(message, ResultProcessor.ResponseTimer, null, server); + } + + // Fail the connection + Writer.WriteLine("Test: Simulating failure"); + muxer.AllowConnect = false; + server.SimulateConnectionFailure(SimulatedFailureType.All); + Assert.False(server.IsConnected); // Server isn't connected + Assert.True(muxer.IsConnected); // ...but the multiplexer is + + // Queue up some commands + Writer.WriteLine("Test: Disconnected pings"); + var ignoredA = PingAsync(server); + var ignoredB = PingAsync(server); + var lastPing = PingAsync(server); + + var disconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); + Assert.False(server.IsConnected); + Assert.True(muxer.IsConnected); + Assert.True(disconnectedStats.BacklogMessagesPending >= 3, $"Expected {nameof(disconnectedStats.BacklogMessagesPending)} > 3, got {disconnectedStats.BacklogMessagesPending}"); + + Writer.WriteLine("Test: Allowing reconnect"); + muxer.AllowConnect = true; + Writer.WriteLine("Test: Awaiting reconnect"); + await UntilCondition(TimeSpan.FromSeconds(3), () => server.IsConnected).ForAwait(); + + Writer.WriteLine("Test: Checking reconnected 1"); + Assert.True(server.IsConnected); + Assert.True(muxer.IsConnected); + + Writer.WriteLine("Test: ignoredA Status: " + ignoredA.Status); + Writer.WriteLine("Test: ignoredB Status: " + ignoredB.Status); + Writer.WriteLine("Test: lastPing Status: " + lastPing.Status); + var afterConnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); + Writer.WriteLine($"Test: BacklogStatus: {afterConnectedStats.BacklogStatus}, BacklogMessagesPending: {afterConnectedStats.BacklogMessagesPending}, IsWriterActive: {afterConnectedStats.IsWriterActive}, MessagesSinceLastHeartbeat: {afterConnectedStats.MessagesSinceLastHeartbeat}, TotalBacklogMessagesQueued: {afterConnectedStats.TotalBacklogMessagesQueued}"); + + Writer.WriteLine("Test: Awaiting lastPing 1"); + await lastPing; + + Writer.WriteLine("Test: Checking reconnected 2"); + Assert.True(server.IsConnected); + Assert.True(muxer.IsConnected); + var reconnectedStats = server.GetBridgeStatus(ConnectionType.Interactive); + Assert.Equal(0, reconnectedStats.BacklogMessagesPending); + + Writer.WriteLine("Test: Pinging again..."); + _ = PingAsync(server); + _ = PingAsync(server); + Writer.WriteLine("Test: Last Ping issued"); + lastPing = PingAsync(server); ; + + // We should see none queued + Writer.WriteLine("Test: BacklogMessagesPending check"); + Assert.Equal(0, stats.BacklogMessagesPending); + Writer.WriteLine("Test: Awaiting lastPing 2"); + await lastPing; + Writer.WriteLine("Test: Done"); + } + finally + { + ClearAmbientFailures(); + } + } } }