Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [master, v9, 'v9.*']
pull_request:
branches: [master, v9, v9.7, v9.8, 'ndyakov/*', 'ofekshenawa/*', 'htemelski-redis/*', 'ce/*']
branches: [master, v9, v9.7, v9.8, 'ndyakov/**', 'ofekshenawa/**', 'ce/**']

permissions:
contents: read
Expand Down
63 changes: 63 additions & 0 deletions .github/workflows/test-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: E2E Tests

on:
push:
branches: [master, v9, 'v9.*']
pull_request:
branches: [master, v9, v9.7, v9.8, 'ndyakov/**', 'ofekshenawa/**', 'ce/**']

permissions:
contents: read

jobs:
test-e2e-mock:
name: E2E Tests (Mock Proxy)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
go-version:
- "1.23.x"
- stable

steps:
- name: Checkout code
uses: actions/checkout@v6

- name: Set up Go ${{ matrix.go-version }}
uses: actions/setup-go@v6
with:
go-version: ${{ matrix.go-version }}

- name: Start Docker services for E2E tests
run: make docker.e2e.start

- name: Wait for services to be ready
run: |
echo "Waiting for Redis to be ready..."
timeout 30 bash -c 'until docker exec redis-standalone redis-cli ping 2>/dev/null; do sleep 1; done'
echo "Waiting for cae-resp-proxy to be ready..."
timeout 30 bash -c 'until curl -s http://localhost:18100/stats > /dev/null; do sleep 1; done'
echo "All services are ready!"

- name: Run E2E tests with mock proxy
env:
E2E_SCENARIO_TESTS: "true"
run: |
go test -v ./maintnotifications/e2e/ -timeout 30m -race
continue-on-error: false

- name: Stop Docker services
if: always()
run: make docker.e2e.stop

- name: Show Docker logs on failure
if: failure()
run: |
echo "=== Redis logs ==="
docker logs redis-standalone 2>&1 | tail -100
echo "=== cae-resp-proxy logs ==="
docker logs cae-resp-proxy 2>&1 | tail -100
echo "=== proxy-fault-injector logs ==="
docker logs proxy-fault-injector 2>&1 | tail -100

37 changes: 36 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ docker.start:
docker.stop:
docker compose --profile all down

docker.e2e.start:
@echo "Starting Redis and cae-resp-proxy for E2E tests..."
docker compose --profile e2e up -d --quiet-pull
@echo "Waiting for services to be ready..."
@sleep 3
@echo "Services ready!"

docker.e2e.stop:
@echo "Stopping E2E services..."
docker compose --profile e2e down

test:
$(MAKE) docker.start
@if [ -z "$(REDIS_VERSION)" ]; then \
Expand Down Expand Up @@ -66,7 +77,31 @@ bench:
export REDIS_VERSION=$(REDIS_VERSION) && \
go test ./... -test.run=NONE -test.bench=. -test.benchmem -skip Example

.PHONY: all test test.ci test.ci.skip-vectorsets bench fmt
test.e2e:
@echo "Running E2E tests with auto-start proxy..."
$(MAKE) docker.e2e.start
@echo "Running tests..."
@E2E_SCENARIO_TESTS=true go test -v ./maintnotifications/e2e/ -timeout 30m || ($(MAKE) docker.e2e.stop && exit 1)
$(MAKE) docker.e2e.stop
@echo "E2E tests completed!"

test.e2e.docker:
@echo "Running Docker-compatible E2E tests..."
$(MAKE) docker.e2e.start
@echo "Running unified injector tests..."
@E2E_SCENARIO_TESTS=true go test -v -run "TestUnifiedInjector|TestCreateTestFaultInjectorLogic|TestFaultInjectorClientCreation" ./maintnotifications/e2e/ -timeout 10m || ($(MAKE) docker.e2e.stop && exit 1)
$(MAKE) docker.e2e.stop
@echo "Docker E2E tests completed!"

test.e2e.logic:
@echo "Running E2E logic tests (no proxy required)..."
@E2E_SCENARIO_TESTS=true \
REDIS_ENDPOINTS_CONFIG_PATH=/tmp/test_endpoints_verify.json \
FAULT_INJECTION_API_URL=http://localhost:8080 \
go test -v -run "TestCreateTestFaultInjectorLogic|TestFaultInjectorClientCreation" ./maintnotifications/e2e/
@echo "Logic tests completed!"

.PHONY: all test test.ci test.ci.skip-vectorsets bench fmt test.e2e test.e2e.logic docker.e2e.start docker.e2e.stop

build:
export RE_CLUSTER=$(RE_CLUSTER) && \
Expand Down
20 changes: 15 additions & 5 deletions commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8922,27 +8922,37 @@ var _ = Describe("Commands", func() {
const key = "latency-monitor-threshold"

old := client.ConfigGet(ctx, key).Val()
client.ConfigSet(ctx, key, "1")
// Use a higher threshold (100ms) to avoid capturing normal operations
// that could cause flakiness due to timing variations
client.ConfigSet(ctx, key, "100")
defer client.ConfigSet(ctx, key, old[key])

result, err := client.Latency(ctx).Result()
Expect(err).NotTo(HaveOccurred())
Expect(len(result)).Should(Equal(0))

err = client.Do(ctx, "DEBUG", "SLEEP", 0.01).Err()
// Use a longer sleep (150ms) to ensure it exceeds the 100ms threshold
err = client.Do(ctx, "DEBUG", "SLEEP", 0.15).Err()
Expect(err).NotTo(HaveOccurred())

result, err = client.Latency(ctx).Result()
Expect(err).NotTo(HaveOccurred())
Expect(len(result)).Should(Equal(1))
Expect(len(result)).Should(BeNumerically(">=", 1))

// reset latency by event name
err = client.LatencyReset(ctx, result[0].Name).Err()
eventName := result[0].Name
err = client.LatencyReset(ctx, eventName).Err()
Expect(err).NotTo(HaveOccurred())

// Verify the specific event was reset (not that all events are gone)
// This avoids flakiness from other operations triggering latency events
result, err = client.Latency(ctx).Result()
Expect(err).NotTo(HaveOccurred())
Expect(len(result)).Should(Equal(0))
for _, event := range result {
if event.Name == eventName {
Fail("Event " + eventName + " should have been reset")
}
}
})
})
})
Expand Down
38 changes: 38 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ services:
- sentinel
- all-stack
- all
- e2e

osscluster:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4.0}
Expand All @@ -39,6 +40,43 @@ services:
- all-stack
- all

cae-resp-proxy:
image: redislabs/client-resp-proxy:latest
container_name: cae-resp-proxy
environment:
- TARGET_HOST=redis
- TARGET_PORT=6379
- LISTEN_PORT=17000,17001,17002,17003 # 4 proxy nodes: initially show 3, swap in 4th during SMIGRATED
- LISTEN_HOST=0.0.0.0
- API_PORT=3000
- DEFAULT_INTERCEPTORS=cluster,hitless
ports:
- "17000:17000" # Proxy node 1 (host:container)
- "17001:17001" # Proxy node 2 (host:container)
- "17002:17002" # Proxy node 3 (host:container)
- "17003:17003" # Proxy node 4 (host:container) - hidden initially, swapped in during SMIGRATED
- "18100:3000" # HTTP API port (host:container)
depends_on:
- redis
profiles:
- e2e
- all

proxy-fault-injector:
build:
context: .
dockerfile: maintnotifications/e2e/cmd/proxy-fi-server/Dockerfile
container_name: proxy-fault-injector
ports:
- "15000:5000" # Fault injector API port (host:container)
depends_on:
- cae-resp-proxy
environment:
- PROXY_API_URL=http://cae-resp-proxy:3000
profiles:
- e2e
- all

sentinel-cluster:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4.0}
platform: linux/amd64
Expand Down
45 changes: 45 additions & 0 deletions internal/maintnotifications/logs/log_messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ const (
UnrelaxedTimeoutMessage = "clearing relaxed timeout"
ManagerNotInitializedMessage = "manager not initialized"
FailedToMarkForHandoffMessage = "failed to mark connection for handoff"
InvalidSeqIDInSMigratingNotificationMessage = "invalid SeqID in SMIGRATING notification"
InvalidSeqIDInSMigratedNotificationMessage = "invalid SeqID in SMIGRATED notification"
InvalidHostPortInSMigratedNotificationMessage = "invalid host:port in SMIGRATED notification"
SlotMigratingMessage = "slots migrating, applying relaxed timeout"
SlotMigratedMessage = "slots migrated, triggering cluster state reload"

// ========================================
// used in pool/conn
Expand Down Expand Up @@ -633,3 +638,43 @@ func ExtractDataFromLogMessage(logMessage string) map[string]interface{} {
// If JSON parsing fails, return empty map
return result
}

// Cluster notification functions
func InvalidSeqIDInSMigratingNotification(seqID interface{}) string {
message := fmt.Sprintf("%s: %v", InvalidSeqIDInSMigratingNotificationMessage, seqID)
return appendJSONIfDebug(message, map[string]interface{}{
"seqID": fmt.Sprintf("%v", seqID),
})
}

func InvalidSeqIDInSMigratedNotification(seqID interface{}) string {
message := fmt.Sprintf("%s: %v", InvalidSeqIDInSMigratedNotificationMessage, seqID)
return appendJSONIfDebug(message, map[string]interface{}{
"seqID": fmt.Sprintf("%v", seqID),
})
}

func InvalidHostPortInSMigratedNotification(hostPort interface{}) string {
message := fmt.Sprintf("%s: %v", InvalidHostPortInSMigratedNotificationMessage, hostPort)
return appendJSONIfDebug(message, map[string]interface{}{
"hostPort": fmt.Sprintf("%v", hostPort),
})
}

func SlotMigrating(connID uint64, seqID int64, slotRanges []string) string {
message := fmt.Sprintf("conn[%d] %s seqID=%d slots=%v", connID, SlotMigratingMessage, seqID, slotRanges)
return appendJSONIfDebug(message, map[string]interface{}{
"connID": connID,
"seqID": seqID,
"slotRanges": slotRanges,
})
}

func SlotMigrated(seqID int64, hostPort string, slotRanges []string) string {
message := fmt.Sprintf("%s seqID=%d host:port=%s slots=%v", SlotMigratedMessage, seqID, hostPort, slotRanges)
return appendJSONIfDebug(message, map[string]interface{}{
"seqID": seqID,
"hostPort": hostPort,
"slotRanges": slotRanges,
})
}
10 changes: 8 additions & 2 deletions maintnotifications/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@

Seamless Redis connection handoffs during cluster maintenance operations without dropping connections.

## ⚠️ **Important Note**
**Maintenance notifications are currently supported only in standalone Redis clients.** Cluster clients (ClusterClient, FailoverClient, etc.) do not yet support this functionality.
## Cluster Support

**Cluster notifications are now supported for ClusterClient!**

- **SMIGRATING**: `["SMIGRATING", SeqID, slot/range, ...]` - Relaxes timeouts when slots are being migrated
- **SMIGRATED**: `["SMIGRATED", SeqID, host:port, slot/range, ...]` - Reloads cluster state when slot migration completes

**Note:** Other maintenance notifications (MOVING, MIGRATING, MIGRATED, FAILING_OVER, FAILED_OVER) are supported only in standalone Redis clients. Cluster clients support SMIGRATING and SMIGRATED for cluster-specific slot migration handling.

## Quick Start

Expand Down
52 changes: 46 additions & 6 deletions maintnotifications/e2e/README_SCENARIOS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,36 @@ This directory contains comprehensive end-to-end test scenarios for Redis push n

## Introduction

To run those tests you would need a fault injector service, please review the client and feel free to implement your
fault injector of choice. Those tests are tailored for Redis Enterprise, but can be adapted to other Redis distributions where
a fault injector is available.
These tests support two modes:

Once you have fault injector service up and running, you can execute the tests by running the `run-e2e-tests.sh` script.
there are three environment variables that need to be set before running the tests:
### 1. Mock Proxy Mode (Default)
Uses a local Docker-based proxy ([cae-resp-proxy](https://github.com/redis-developer/cae-resp-proxy)) to simulate Redis Enterprise behavior. This mode:
- Runs entirely locally without external dependencies
- Provides fast feedback for development
- Simulates cluster topology changes
- Supports SMIGRATING and SMIGRATED notifications

To run in mock proxy mode:
```bash
make test.e2e
```

### 2. Real Fault Injector Mode
Uses a real Redis Enterprise fault injector service for comprehensive testing. This mode:
- Tests against actual Redis Enterprise clusters
- Validates real-world scenarios
- Requires external fault injector setup

To run with a real fault injector, set these environment variables:
- `REDIS_ENDPOINTS_CONFIG_PATH`: Path to Redis endpoints configuration
- `FAULT_INJECTION_API_URL`: URL of the fault injector server
- `E2E_SCENARIO_TESTS`: Set to `true` to enable scenario tests

Then run:
```bash
./scripts/run-e2e-tests.sh
```

## Test Scenarios Overview

### 1. Basic Push Notifications (`scenario_push_notifications_test.go`)
Expand All @@ -44,7 +63,28 @@ there are three environment variables that need to be set before running the tes
- Notification delivery consistency
- Handoff behavior per endpoint type

### 3. Database Management Scenario (`scenario_database_management_test.go`)
### 3. Unified Injector Scenarios (`scenario_unified_injector_test.go`)
**Mock proxy-based notification testing**
- **Purpose**: Test SMIGRATING and SMIGRATED notifications with simulated cluster topology changes
- **Features Tested**:
- SMIGRATING notifications (slot migration in progress)
- SMIGRATED notifications (slot migration completed)
- Cluster topology changes (node swap simulation)
- Complex multi-step migration scenarios
- **Configuration**: Uses local Docker proxy (cae-resp-proxy) with 4 nodes
- **Duration**: ~10 seconds
- **Key Validations**:
- Notification delivery and parsing
- Cluster state reload callbacks
- Client resilience during migrations
- Topology change handling
- **Topology Simulation**:
- Starts with 4 proxy nodes (17000-17003)
- Initially exposes 3 nodes in CLUSTER SLOTS (17000, 17001, 17002)
- On SMIGRATED, swaps node 2 for node 3 (simulates node replacement)
- Verifies client continues to function after topology change

### 4. Database Management Scenario (`scenario_database_management_test.go`)
**Dynamic database creation and deletion**
- **Purpose**: Test database lifecycle management via fault injector
- **Features Tested**: CREATE_DATABASE, DELETE_DATABASE endpoints
Expand Down
Loading
Loading