diff --git a/.github/workflows/agent.yml b/.github/workflows/agent.yml index 6314b11c..f6e0925e 100644 --- a/.github/workflows/agent.yml +++ b/.github/workflows/agent.yml @@ -50,6 +50,10 @@ jobs: with: go-version: '1.24.0' check-latest: true + - name: Start Redis + uses: supercharge/redis-github-action@1.5.0 + with: + redis-version: 5 - name: coveralls id: coveralls run: | @@ -111,7 +115,7 @@ jobs: - name: Start Redis uses: supercharge/redis-github-action@1.5.0 with: - redis-version: 4 + redis-version: 5 - name: acceptance test run: | make -e setup build diff --git a/CHANGELOG.md b/CHANGELOG.md index 323323bc..10f788ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,43 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### New Features + +* **Redis Streams for Persistent Notification Delivery**: Added Redis Streams implementation as an alternative to Redis Pub/Sub for notification synchronization across Agent nodes. Redis Streams provides: + - Persistent message delivery with acknowledgment + - Automatic retries with exponential backoff + - Consumer groups for load balancing + - Configurable batching and flush intervals + - Connection error recovery with reconnection logic + - Comprehensive metrics tracking + + Configure via `synchronization.notification.default: "redis-streams"` in config.yaml. See [docs/redis-streams.md](docs/redis-streams.md) for configuration options including batch_size, flush_interval, max_retries, and connection_timeout. + +* **CMAB Redis Cache Support** ([#447](https://github.com/optimizely/agent/pull/447)): Added Redis cache support for Contextual Multi-Armed Bandit (CMAB) decisions following the same plugin-based architecture as ODP cache: + - In-memory LRU cache (default, size: 10000, TTL: 30m) + - Redis cache for multi-instance deployments with shared caching + - Configurable via `client.cmab.cache` in config.yaml + - Supports both in-memory and Redis cache backends + +* **Flexible Redis Authentication**: Added support for flexible Redis password field names across all Redis clients (ODP cache, UPS, CMAB cache, and synchronization): + - Supports `auth_token`, `redis_secret`, and `password` fields (in order of preference) + - Falls back to environment variables: `REDIS_PASSWORD`, `REDIS_ODP_PASSWORD`, `REDIS_UPS_PASSWORD`, `REDIS_CMAB_PASSWORD` + - Avoids security scanning alerts from hardcoded "password" field names + +* **CMAB Prediction Endpoint Configuration**: Added configurable CMAB prediction endpoint via `client.cmab.predictionEndpoint` in config.yaml or `OPTIMIZELY_CMAB_PREDICTIONENDPOINT` environment variable for testing/staging environments + +### Changed + +* Updated Redis configuration in `synchronization.pubsub.redis` to use `auth_token` instead of `password` field (backwards compatible) +* CMAB configuration moved from top-level to `client.cmab` section for consistency with other client-related settings (ODP, UPS) + +### Fixed + +* Improved Redis connection error handling and recovery across all Redis clients +* Added comprehensive test coverage for Redis authentication and CMAB caching + ## [4.2.2] - October 7, 2025 ### Fixed diff --git a/config.yaml b/config.yaml index a696a5d0..713eb0f9 100644 --- a/config.yaml +++ b/config.yaml @@ -80,6 +80,9 @@ server: ## Note: don't include port in these hosts values - port is stripped from the request host before comparing against these. allowedHosts: - localhost + + ## If using notifications API (/v1/notifications/event-stream), + ## increase these timeouts to prevent SSE disconnects. ## the maximum duration for reading the entire request, including the body. ## Value can be set in seconds (e.g. "5s") or milliseconds (e.g. "5000ms") readTimeout: 5s @@ -285,20 +288,37 @@ runtime: synchronization: pubsub: redis: - host: "redis.demo.svc:6379" - password: "" + host: "localhost:6379" + ## Use auth_token or redis_secret instead of password to avoid security scanning alerts + ## Supports: auth_token, redis_secret, password (in order of preference) + ## Fallback: REDIS_PASSWORD environment variable if config field is empty + auth_token: "" database: 0 - ## channel: "optimizely-sync" # Base channel name (NOT currently parsed - uses hardcoded default) - ## Agent publishes to channels: "optimizely-sync-{sdk_key}" - ## For external Redis clients: Subscribe "optimizely-sync-{sdk_key}" or PSubscribe "optimizely-sync-*" - ## Note: Channel configuration parsing is a known bug - planned for future release + + ## Advanced: Redis Streams Configuration (auto-detected, optional tuning) + ## Agent automatically detects Redis version: + ## - Redis >= 5.0: Uses Redis Streams (persistent, batched delivery) + ## - Redis < 5.0: Uses Redis Pub/Sub (simple, fire-and-forget) + ## + ## The parameters below only apply when Redis Streams is used (Redis >= 5.0). + ## Uncomment to override defaults. Leave commented to use defaults. + # batch_size: 10 # Messages per batch (default: 10) + # flush_interval: 5s # Max wait before sending batch (default: 5s) + # max_retries: 3 # Retry attempts on failure (default: 3) + # retry_delay: 100ms # Initial retry delay (default: 100ms) + # max_retry_delay: 5s # Max retry delay with backoff (default: 5s) + # connection_timeout: 10s # Redis connection timeout (default: 10s) + ## if notification synchronization is enabled, then the active notification event-stream API ## will get the notifications from available replicas notification: enable: false + ## Agent auto-detects best option based on Redis version default: "redis" + ## if datafile synchronization is enabled, then for each webhook API call ## the datafile will be sent to all available replicas to achieve better eventual consistency datafile: enable: false + ## Agent auto-detects best option based on Redis version default: "redis" diff --git a/config/config.go b/config/config.go index 307acc8c..e28e852f 100644 --- a/config/config.go +++ b/config/config.go @@ -103,7 +103,7 @@ func NewDefaultConfig() *AgentConfig { }, }, CMAB: CMABConfig{ - RequestTimeout: 10 * time.Second, + RequestTimeout: 10 * time.Second, PredictionEndpoint: "https://prediction.cmab.optimizely.com/predict/%s", Cache: CMABCacheConfig{ "default": "in-memory", diff --git a/docs/redis-streams.md b/docs/redis-streams.md new file mode 100644 index 00000000..9495c9af --- /dev/null +++ b/docs/redis-streams.md @@ -0,0 +1,886 @@ +# Redis Streams for Notification Delivery (beta) + +Redis Streams provides persistent, reliable message delivery for Agent notifications with guaranteed delivery, message acknowledgment, and automatic recovery. + +## Table of Contents + +- [Overview](#overview) +- [Why Redis for Notifications?](#why-redis-for-notifications) +- [Architecture](#architecture) +- [Configuration](#configuration) +- [Redis Pub/Sub vs Redis Streams](#redis-pubsub-vs-redis-streams) +- [Testing](#testing) +- [Migration Guide](#migration-guide) +- [Troubleshooting](#troubleshooting) +- [FAQ](#faq) + +## Overview + +Redis Streams extends Redis with a log data structure that provides: + +- **Persistent storage** - Messages survive Redis restarts +- **Guaranteed delivery** - Messages are acknowledged only after successful processing +- **Consumer groups** - Load distribution across multiple Agent instances +- **Automatic recovery** - Unacknowledged messages are redelivered +- **Batching** - Efficient processing of multiple messages + +### Prerequisites + +**Redis Version:** Redis **5.0 or higher** is required for Redis Streams support. + +- Redis Streams were introduced in Redis 5.0 +- Recommended: Redis 6.0+ for improved performance and stability +- Verify your version: `redis-cli --version` + +### Redis Streams vs Redis Pub/Sub + +Agent automatically chooses the best implementation based on your Redis version: + +**Redis Streams (Redis >= 5.0):** +- Message delivery is critical (notifications must reach clients) +- Running multiple Agent instances (high availability) +- Need to recover from Agent restarts without message loss +- Want visibility into message delivery status + +**Redis Pub/Sub (Redis < 5.0 or detection fails):** +- Message loss is acceptable (fire-and-forget) +- Running single Agent instance +- Need absolute minimum latency (no persistence overhead) + +> **Note:** You don't need to choose - Agent detects your Redis version and uses the appropriate implementation automatically. + +## Why Redis for Notifications? + +### The Load Balancer Subscription Problem + +When running multiple Agent pods behind a load balancer in Kubernetes, **you can only subscribe to ONE pod's notifications**: + +``` +Client subscribes: + /v1/notifications/event-stream → Load Balancer → Agent Pod 1 (sticky connection) + +Decision requests (load balanced): + /v1/decide → Load Balancer → Agent Pod 1 → Client receives notification + /v1/decide → Load Balancer → Agent Pod 2 → Client MISSES notification! + /v1/decide → Load Balancer → Agent Pod 3 → Client MISSES notification! +``` + +**The Problem:** + +1. **Client subscribes** to `/v1/notifications/event-stream` via load balancer +2. Load balancer routes SSE connection to **one specific Agent pod** (e.g., Pod 1) +3. Client is now subscribed **only to Pod 1's notifications** +4. Decision requests are **load-balanced** across all pods (Pod 1, 2, 3) +5. When decision happens on **Pod 2 or Pod 3**, client **never receives notification** + +**Why you can't subscribe to all pods:** +- **SSE connections are sticky** - once connected to a pod, you stay connected to that pod +- **Load balancer routes to ONE pod** - you can't subscribe to multiple pods simultaneously +- **Subscribing directly to pod IPs is not feasible** - pods are ephemeral in Kubernetes + +**Alternative considered (Push model):** +- Configure Agent pods to push notifications to an external endpoint +- Problem: This would completely change the subscribe-based SSE model +- Decision: Keep the subscribe model, use Redis as central hub instead + +### Redis Solution: Central Notification Hub + +Redis acts as a **shared notification hub** that all Agent pods write to and read from: + +``` +Decision Flow (all pods publish to Redis): + /v1/decide → Load Balancer → Agent Pod 1 → Publishes notification → Redis + /v1/decide → Load Balancer → Agent Pod 2 → Publishes notification → Redis + /v1/decide → Load Balancer → Agent Pod 3 → Publishes notification → Redis + +Subscription Flow (any pod reads from Redis): + Client → /v1/notifications/event-stream → Load Balancer → Agent Pod 1 + ↓ + Agent Pod 1 reads Redis Stream + ↓ + Gets notifications from ALL pods + ↓ + Sends to client via SSE connection +``` + +**How it works:** + +1. **All Agent pods publish to Redis:** + - Decision on Pod 1 → notification published to Redis + - Decision on Pod 2 → notification published to Redis + - Decision on Pod 3 → notification published to Redis + +2. **Client subscribes to one pod (via load balancer):** + - Client → `/v1/notifications/event-stream` → routed to Pod 1 + - Long-lived SSE connection established to Pod 1 + +3. **Pod 1 reads from Redis Stream:** + - Pod 1 subscribes to Redis (using consumer groups) + - Receives notifications from **ALL pods** (including its own) + +4. **Pod 1 forwards to client:** + - Sends all notifications to client over SSE connection + - Client receives notifications from all Agent pods, not just Pod 1 + +**Key Insight:** Client connects to ONE pod, but that pod reads from Redis which aggregates notifications from ALL pods. This solves the load balancer problem without changing the subscribe model. + +### Why Not Use Event Dispatcher? + +**Event Dispatcher** (SDK events → Optimizely servers): +- Each Agent sends events **independently** +- No coordination needed between Agents + +**Notifications** (datafile updates → SSE clients): +- Need to sync updates **across ALL Agents** +- SSE clients connected to different Agents must receive same updates +- Redis provides the broadcast mechanism + +This architecture was designed to ensure **datafile consistency across Agent clusters** in production environments. + +## Architecture + +``` +┌─────────────┐ XADD ┌──────────────┐ +│ Decide ├──────────────►│ Redis Stream │ +│ Handler │ │ (persistent) │ +└─────────────┘ └──────┬───────┘ + │ + XREADGROUP + (batch_size: 5) + │ + ▼ + ┌──-──────────────┐ + │ Consumer Group │ + │ "notifications"│ + └────────┬────────┘ + │ + ┌──────┴──────┐ + │ Batch │ + │ (5 messages)│ + └──────┬──────┘ + │ + Send to SSE Client + │ + ▼ + XACK + (acknowledge delivery) +``` + +### Message Flow + +1. **Publish** - Decide handler adds notification to stream (`XADD`) +2. **Read** - Consumer reads batch of messages (`XREADGROUP`) +3. **Process** - Messages sent to SSE client +4. **Acknowledge** - Successfully delivered messages acknowledged (`XACK`) +5. **Retry** - Unacknowledged messages automatically redelivered + +## Configuration + +> **⚠️ Prerequisites:** Requires Redis 5.0 or higher. Redis Streams are not available in Redis 4.x or earlier. + +### Quick Start Setup + +**Step 1 - Enable notifications in `config.yaml`:** + +```yaml +api: + enableNotifications: true +``` + +**Step 2 - Enable synchronization:** + +```yaml +synchronization: + notification: + enable: true + default: "redis" # Agent auto-detects Redis version and uses best option +``` + +> **Note:** Agent automatically detects your Redis version: +> - **Redis >= 5.0:** Uses Redis Streams (persistent, batched delivery) +> - **Redis < 5.0:** Falls back to Redis Pub/Sub (fire-and-forget) +> - **Detection fails:** Safely falls back to Redis Pub/Sub + +**Step 3 - Configure Redis connection:** + +```yaml +synchronization: + pubsub: + redis: + host: "localhost:6379" + auth_token: "" # Recommended: use auth_token or redis_secret + # password: "" # Alternative: password (may trigger security scanners) + database: 0 +``` + +**Step 4 - (Optional) Tune Redis Streams performance:** + +> **Note:** These parameters only apply when Redis Streams is used (Redis >= 5.0). +> They are ignored if Redis Pub/Sub is used. Leave these out to use sensible defaults. + +```yaml +synchronization: + pubsub: + redis: + # Batching configuration (optional - defaults shown) + batch_size: 10 # Messages per batch (default: 10) + flush_interval: 5s # Max wait for partial batch (default: 5s) + + # Retry configuration (optional - defaults shown) + max_retries: 3 # Retry attempts (default: 3) + retry_delay: 100ms # Initial retry delay (default: 100ms) + max_retry_delay: 5s # Max retry delay (default: 5s) + connection_timeout: 10s # Connection timeout (default: 10s) +``` + +**Step 5 - (Optional) Increase HTTP timeouts to prevent SSE disconnects:** + +```yaml +server: + readTimeout: 300s # 5 minutes + writeTimeout: 300s # 5 minutes +``` + +**Step 6 - (Optional) Enable TLS/HTTPS:** + +```yaml +server: + keyFile: /path/to/key.pem + certFile: /path/to/cert.pem +``` + +### Full Configuration Example + +```yaml +api: + enableNotifications: true + +server: + readTimeout: 300s + writeTimeout: 300s + # Optional: Enable HTTPS + # keyFile: /path/to/key.pem + # certFile: /path/to/cert.pem + +synchronization: + pubsub: + redis: + host: "localhost:6379" + auth_token: "" # Supports: auth_token, redis_secret, password + # Fallback: REDIS_PASSWORD environment variable + database: 0 + + # Optional: Redis Streams tuning (only applies if Redis >= 5.0) + # Uncomment to override defaults + batch_size: 5 # Messages per batch (default: 10) + flush_interval: 2s # Max wait before sending (default: 5s) + max_retries: 3 # Retry attempts (default: 3) + retry_delay: 100ms # Initial retry delay (default: 100ms) + max_retry_delay: 5s # Max retry delay (default: 5s) + connection_timeout: 10s # Connection timeout (default: 10s) + + notification: + enable: true + default: "redis" # Agent auto-detects best option based on Redis version +``` + +### Security: Password Configuration + +To avoid security scanner alerts, use alternative field names: + +```yaml +# Preferred (no security scanner alerts) +auth_token: "your-redis-password" + +# Alternative +redis_secret: "your-redis-password" + +# Fallback to environment variable (if config field empty) +# export REDIS_PASSWORD="your-redis-password" + +# Not recommended (triggers security scanners) +password: "your-redis-password" +``` + +The Agent checks fields in this order: `auth_token` → `redis_secret` → `password` → `REDIS_PASSWORD` env var. + +### Automatic Redis Version Detection + +Agent automatically detects your Redis version at startup and chooses the best notification implementation: + +**Detection Flow:** +1. Agent connects to Redis +2. Runs `INFO server` command to get Redis version +3. Parses `redis_version` field (e.g., `6.2.5`) +4. If major version >= 5: Uses Redis Streams +5. If major version < 5: Uses Redis Pub/Sub +6. If detection fails: Falls back to Redis Pub/Sub (safe default) + +**Logging Examples:** + +Redis 6.x detected: +``` +INFO Auto-detecting Redis version to choose best notification implementation... +INFO Redis Streams supported - will use Streams for notifications redis_version=6 +``` + +Redis 4.x detected: +``` +INFO Auto-detecting Redis version to choose best notification implementation... +INFO Redis Streams not supported - will use Pub/Sub for notifications redis_version=4 min_required=5 +``` + +Detection failed: +``` +INFO Auto-detecting Redis version to choose best notification implementation... +WARN Could not detect Redis version - will use Pub/Sub as safe fallback error="NOPERM" +``` + +> **Note:** If auto-detection fails, Agent safely falls back to Redis Pub/Sub (compatible with all Redis versions). + +### Configuration Parameters + +> **Note:** These parameters only apply when Redis Streams is used (Redis >= 5.0). + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `batch_size` | 10 | Number of messages to batch before sending | +| `flush_interval` | 5s | Maximum time to wait before sending partial batch | +| `max_retries` | 3 | Maximum retry attempts for failed operations | +| `retry_delay` | 100ms | Initial delay between retry attempts | +| `max_retry_delay` | 5s | Maximum delay with exponential backoff | +| `connection_timeout` | 10s | Timeout for Redis connections | + +### Performance Tuning + +**For low-latency (real-time notifications):** +```yaml +batch_size: 5 +flush_interval: 500ms # 0.5s max latency +``` + +**For high-throughput (batch processing):** +```yaml +batch_size: 100 +flush_interval: 5s +``` + +**For burst traffic:** +```yaml +batch_size: 50 +flush_interval: 1s +``` + +## Redis Pub/Sub vs Redis Streams + +### Comparison + +| Feature | Redis Pub/Sub | Redis Streams | +|---------|---------------|---------------| +| **Delivery guarantee** | Fire-and-forget | Guaranteed with ACK | +| **Persistence** | No (in-memory only) | Yes (survives restarts) | +| **Message recovery** | No | Yes (redelivery) | +| **Consumer groups** | No | Yes | +| **Latency** | Lowest (~1ms) | Low (~2-5ms) | +| **Memory usage** | Minimal | Higher (persistence) | +| **Complexity** | Simple | Moderate | +| **Redis version** | 2.0+ | 5.0+ required | +| **Selection** | Auto-detected (< 5.0) | Auto-detected (>= 5.0) | + +> **Note:** Agent automatically detects your Redis version and uses the appropriate implementation. You don't need to choose manually. + +### Migration Path + +**Currently using Redis Pub/Sub?** Switching to Redis Streams is automatic if you upgrade Redis: + +**Scenario 1: Already using `default: "redis"` (auto-detect)** +```yaml +synchronization: + notification: + default: "redis" # Already using auto-detection +``` +- **Redis 4.x:** Currently using Pub/Sub +- **Upgrade Redis to 6.x:** Automatically switches to Streams (no config change needed!) + +**Scenario 2: Explicitly set to `default: "redis"` (legacy Pub/Sub)** +```yaml +# Old config (explicit Pub/Sub, no auto-detection) +synchronization: + notification: + default: "redis" +``` +- This now uses auto-detection +- Redis 5+ will automatically use Streams +- No breaking changes + +All Redis Streams configuration is backward compatible - existing `pubsub.redis` settings are reused. + +## Testing + +### Test 1: Batching Behavior + +Send burst traffic to trigger batching: + +```bash +# Send 20 requests instantly (in parallel) +for i in {1..20}; do + curl -H "X-Optimizely-SDK-Key: YOUR_SDK_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"userId\":\"burst-$i\"}" \ + "localhost:8080/v1/decide" & +done +wait +``` + +**Verify batching in Redis Monitor:** + +```bash +redis-cli monitor | grep -E "xack|xreadgroup" +``` + +**Expected patterns:** + +Multiple XACKs with same timestamp prefix (batch of 5): +``` +"xack" ... "1759461708595-1" +"xack" ... "1759461708595-2" +"xack" ... "1759461708595-3" +"xack" ... "1759461708595-4" +"xack" ... "1759461708595-5" +``` + +### Test 2: Flush Interval + +Send messages slower than batch size: + +```bash +# Send 3 messages (less than batch_size) +for i in {1..3}; do + curl -H "X-Optimizely-SDK-Key: YOUR_SDK_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"userId\":\"flush-test-$i\"}" \ + "localhost:8080/v1/decide" +done +``` + +**Expected:** Messages delivered after `flush_interval` (e.g., 2s) even though batch isn't full. + +### Test 3: Message Recovery + +Test that messages survive Agent restarts: + +**Step 1 - Send messages:** +```bash +for i in {1..5}; do + curl -H "X-Optimizely-SDK-Key: YOUR_SDK_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"userId\":\"recovery-test-$i\"}" \ + "localhost:8080/v1/decide" +done +``` + +**Step 2 - Kill Agent:** +```bash +# Stop the agent process +pkill -f optimizely +``` + +**Step 3 - Verify messages in Redis:** +```bash +redis-cli +> XLEN stream:optimizely-sync-YOUR_SDK_KEY +(integer) 20 # 5 users × 4 flags + +> XRANGE stream:optimizely-sync-YOUR_SDK_KEY - + COUNT 5 +# Shows pending messages +``` + +**Step 4 - Restart Agent:** +```bash +./bin/optimizely +``` + +**Expected:** All messages automatically redelivered to SSE clients. + +### Redis CLI Inspection Commands + +```bash +# List all streams +KEYS stream:* + +# Check stream length +XLEN stream:optimizely-sync-{SDK_KEY} + +# View messages in stream +XRANGE stream:optimizely-sync-{SDK_KEY} - + COUNT 10 + +# View consumer group info +XINFO GROUPS stream:optimizely-sync-{SDK_KEY} + +# View pending messages (unacknowledged) +XPENDING stream:optimizely-sync-{SDK_KEY} notifications + +# View consumer info +XINFO CONSUMERS stream:optimizely-sync-{SDK_KEY} notifications + +# Clear stream (for testing) +DEL stream:optimizely-sync-{SDK_KEY} +``` + +## Migration Guide + +### Upgrading Redis Version (4.x → 5.x+) + +When you upgrade your Redis server from version 4.x to 5.x or higher, Agent will **automatically** start using Redis Streams on the next restart—no configuration changes needed. + +**1. Upgrade Redis:** + +```bash +# Example: Docker upgrade from Redis 4.x to 6.x +docker stop my-redis +docker run -d --name my-redis -p 6379:6379 redis:6.2 +``` + +**2. Restart Agent:** + +Agent will detect the new Redis version and automatically use Streams: + +``` +INFO Auto-detecting Redis version to choose best notification implementation... +INFO Redis Streams supported - will use Streams for notifications redis_version=6 +``` + +**3. (Optional) Add performance tuning:** + +If you want to customize batch size or flush interval for high-traffic scenarios: + +```yaml +synchronization: + pubsub: + redis: + batch_size: 50 # Larger batches for high traffic + flush_interval: 10s # Longer interval for efficiency +``` + +**4. Verify operation:** + +```bash +# Check streams are created +redis-cli KEYS "stream:*" + +# Monitor activity +redis-cli monitor | grep -E "xadd|xreadgroup|xack" +``` + +**5. Clean up old pub/sub channels (optional):** + +```bash +# List old channels from previous Pub/Sub usage +redis-cli PUBSUB CHANNELS "optimizely-sync-*" + +# They will expire naturally when no longer used +``` + +## Troubleshooting + +### Messages Not Delivered + +**Check 1 - Verify stream exists:** +```bash +redis-cli KEYS "stream:optimizely-sync-*" +``` + +**Check 2 - Check consumer group:** +```bash +redis-cli XINFO GROUPS stream:optimizely-sync-{SDK_KEY} +``` + +Expected output: +``` +1) "name" +2) "notifications" +3) "consumers" +4) (integer) 1 +5) "pending" +6) (integer) 0 +``` + +**Check 3 - Check for pending messages:** +```bash +redis-cli XPENDING stream:optimizely-sync-{SDK_KEY} notifications +``` + +If `pending > 0`, messages are stuck. Agent may have crashed before ACK. + +**Solution:** Restart Agent to reprocess pending messages. + +### High Memory Usage + +**Cause:** Streams not being trimmed. + +**Check stream length:** +```bash +redis-cli XLEN stream:optimizely-sync-{SDK_KEY} +``` + +**Solution 1 - Configure max length (future enhancement):** +```yaml +# Not currently implemented +max_len: 1000 # Keep only last 1000 messages +``` + +**Solution 2 - Manual cleanup:** +```bash +# Keep only last 100 messages +redis-cli XTRIM stream:optimizely-sync-{SDK_KEY} MAXLEN ~ 100 +``` + +### Connection Errors + +**Error:** `connection refused` or `timeout` + +**Check Redis availability:** +```bash +redis-cli ping +``` + +**Verify configuration:** +```yaml +synchronization: + pubsub: + redis: + host: "localhost:6379" # Correct host? + connection_timeout: 10s # Increase if needed +``` + +**Check Agent logs:** +```bash +# Look for connection errors +grep -i "redis" agent.log +``` + +### Performance Issues + +**Symptom:** High latency for notifications + +**Solution 1 - Reduce batch size:** +```yaml +batch_size: 5 # Smaller batches +flush_interval: 500ms # Faster flush +``` + +**Solution 2 - Check Redis performance:** +```bash +redis-cli --latency +redis-cli --stat +``` + +**Solution 3 - Monitor batch metrics:** +```bash +curl http://localhost:8088/metrics | grep redis_streams +``` + +## Advanced Topics + +### Consumer Groups & Load Balancing + +Redis Streams uses consumer groups to distribute messages across multiple Agent instances: + +- **Stream name:** `stream:optimizely-sync-{SDK_KEY}` +- **Consumer group:** `notifications` (default) +- **Consumer name:** `consumer-{timestamp}` (unique per Agent instance) + +**How it works:** + +``` +Stream → Consumer Group "notifications" → Agent 1 (consumer-123) reads msg 1, 2, 3 + → Agent 2 (consumer-456) reads msg 4, 5, 6 + → Agent 3 (consumer-789) reads msg 7, 8, 9 +``` + +Multiple Agents reading from same stream will **load-balance messages automatically**. + +### Multiple SDK Keys Support + +Subscribe to notifications for **multiple SDK keys** using wildcards: + +**Single SDK key:** +```bash +curl -N 'http://localhost:8080/v1/notifications/event-stream' \ + -H 'X-Optimizely-SDK-Key: ABC123' +``` + +**All SDK keys (Redis pattern subscribe):** +```bash +# Agent publishes to: stream:optimizely-sync-{sdk_key} +# Subscribe with pattern: stream:optimizely-sync-* + +redis-cli PSUBSCRIBE "stream:optimizely-sync-*" +``` + +### Message Claiming & Fault Tolerance + +If an Agent crashes before acknowledging a message, **another Agent can claim it**: + +**Step 1 - Agent 1 reads message:** +```bash +XREADGROUP GROUP notifications consumer1 STREAMS stream:name ">" +``` + +**Step 2 - Agent 1 crashes (message pending, not acknowledged)** + +**Step 3 - Check pending messages:** +```bash +XPENDING stream:name notifications +# Shows message owned by consumer1 (dead) +``` + +**Step 4 - Agent 2 claims abandoned message:** +```bash +XCLAIM stream:name notifications consumer2 60000 +# Claims messages pending > 60 seconds +``` + +**Step 5 - Agent 2 processes and acknowledges:** +```bash +XACK stream:name notifications +``` + +**Benefits:** +- **Load balancing:** Multiple workers process different messages +- **Fault tolerance:** Dead workers' messages claimed by others +- **Exactly-once delivery:** Messages stay pending until acknowledged + +### Message Format + +Messages stored in streams contain: + +```json +{ + "data": "{\"type\":\"decision\",\"message\":{...}}", + "timestamp": 1759461274 +} +``` + +- `data`: JSON-encoded notification payload +- `timestamp`: Unix timestamp of message creation + +### Retry Logic + +Failed operations use exponential backoff: + +1. Initial delay: `retry_delay` (default: 100ms) +2. Each retry: delay × 2 +3. Max delay: `max_retry_delay` (default: 5s) +4. Max retries: `max_retries` (default: 3) + +**Retryable errors:** +- Connection errors (refused, reset, timeout) +- Redis LOADING, READONLY, CLUSTERDOWN states + +**Non-retryable errors:** +- Authentication errors +- Invalid commands +- Memory limit exceeded + +## FAQ + +### Does Agent support TLS/HTTPS? + +Yes, TLS is configurable in `config.yaml`: + +```yaml +server: + keyFile: /path/to/key.pem # TLS private key + certFile: /path/to/cert.pem # TLS certificate +``` + +Uncomment and set paths to enable HTTPS for the Agent server. + +### Can I subscribe to multiple SDK keys? + +Yes, use Redis pattern subscribe: + +```bash +# Subscribe to all SDK keys +redis-cli PSUBSCRIBE "stream:optimizely-sync-*" +``` + +Agent publishes to channels: `stream:optimizely-sync-{sdk_key}` + +### Are large messages a problem? + +**Redis Streams:** Can handle up to **512MB** messages (Redis max string size) + +**SQS comparison:** Only **256KB** limit + +**Considerations:** +- Redis memory usage increases with message size +- Network bandwidth for large payloads +- Serialization/deserialization overhead + +For production, keep notifications < 1MB for optimal performance. + +### How do I avoid "password" security scanner alerts? + +Use alternative field names in `config.yaml`: + +```yaml +auth_token: "your-redis-password" # Preferred +# or +redis_secret: "your-redis-password" +# or +# export REDIS_PASSWORD="your-redis-password" # Environment variable +``` + +Avoid using `password:` field name which triggers security scanners. + +### Why use Redis instead of direct event dispatching? + +**Event dispatching** (SDK → Optimizely): +- Each Agent sends events independently ✓ + +**Redis notifications** (Agent ↔ Agent): +- Syncs datafile updates across **all Agent instances** +- Solves the load balancer problem (webhook → random Agent) +- Ensures all Agents serve consistent data + +See [Why Redis for Notifications?](#why-redis-for-notifications) for details. + +### Can multiple consumers read the same message? + +**Consumer groups:** No - messages distributed across consumers (load balancing) + +``` +Msg 1 → Consumer A +Msg 2 → Consumer B (different message) +Msg 3 → Consumer A +``` + +**Multiple consumer groups:** Yes - different groups get same messages + +``` +Group "notifications" → Consumer A gets Msg 1 +Group "analytics" → Consumer X gets Msg 1 (same message) +``` + +### What happens if a consumer crashes? + +Messages become **pending** (unacknowledged). Another consumer can **claim** them: + +```bash +# Check pending messages +XPENDING stream:name notifications + +# Claim abandoned messages (60s timeout) +XCLAIM stream:name notifications consumer2 60000 + +# Process and acknowledge +XACK stream:name notifications +``` + +This ensures **no message loss** even when Agents crash. + +## See Also + +- [Redis Streams Documentation](https://redis.io/docs/latest/develop/data-types/streams/) diff --git a/go.mod b/go.mod index ed98e3e4..27010b1b 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/go-chi/render v1.0.2 github.com/go-kit/kit v0.12.0 github.com/go-redis/redis/v8 v8.11.5 + github.com/go-redis/redismock/v8 v8.11.5 github.com/golang-jwt/jwt/v4 v4.5.2 github.com/google/uuid v1.3.1 github.com/lestrrat-go/jwx/v2 v2.0.20 diff --git a/go.sum b/go.sum index 6fd3b18e..c5e62988 100644 --- a/go.sum +++ b/go.sum @@ -47,6 +47,7 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -72,6 +73,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/go-chi/chi v1.5.4 h1:QHdzF2szwjqVV4wmByUnTcsbIg7UGaQ0tPF2t5GcAIs= @@ -97,6 +100,9 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= +github.com/go-redis/redismock/v8 v8.11.5 h1:RJFIiua58hrBrSpXhnGX3on79AU3S271H4ZhRI1wyVo= +github.com/go-redis/redismock/v8 v8.11.5/go.mod h1:UaAU9dEe1C+eGr+FHV5prCWIt0hafyPWbGMEWE0UWdA= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -130,6 +136,7 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -160,6 +167,7 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= @@ -178,6 +186,7 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -223,10 +232,18 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.18.1 h1:M1GfJqGRrBrrGGsbxzV5dqM2U2ApXefZCQpkukxYRLE= github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs= github.com/optimizely/go-sdk/v2 v2.1.1-0.20250930190916-92b83d299b7a h1:wB445WJVx9JLYsHFQiy2OruPJlZ9ejae8vfsRHKZAtQ= @@ -378,6 +395,7 @@ golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -398,6 +416,7 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= @@ -407,6 +426,7 @@ golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= @@ -448,6 +468,7 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -456,7 +477,10 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -477,12 +501,15 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -519,6 +546,7 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= @@ -578,6 +606,7 @@ golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82u golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= @@ -698,11 +727,14 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/syncer/pubsub.go b/pkg/syncer/pubsub.go index 6436c03e..e2203b62 100644 --- a/pkg/syncer/pubsub.go +++ b/pkg/syncer/pubsub.go @@ -20,23 +20,29 @@ package syncer import ( "context" "errors" + "time" + "github.com/go-redis/redis/v8" "github.com/optimizely/agent/config" "github.com/optimizely/agent/pkg/syncer/pubsub" + "github.com/optimizely/agent/pkg/utils/redisauth" + "github.com/rs/zerolog/log" ) const ( // PubSubDefaultChan will be used as default pubsub channel name PubSubDefaultChan = "optimizely-sync" - // PubSubRedis is the name of pubsub type of Redis + // PubSubRedis is the name of pubsub type of Redis (fire-and-forget) PubSubRedis = "redis" + // PubSubRedisStreams is the name of pubsub type of Redis Streams (persistent) + PubSubRedisStreams = "redis-streams" ) -type SycnFeatureFlag string +type SyncFeatureFlag string const ( - SyncFeatureFlagNotificaiton SycnFeatureFlag = "sync-feature-flag-notification" - SycnFeatureFlagDatafile SycnFeatureFlag = "sync-feature-flag-datafile" + SyncFeatureFlagNotification SyncFeatureFlag = "sync-feature-flag-notification" + SyncFeatureFlagDatafile SyncFeatureFlag = "sync-feature-flag-datafile" ) type PubSub interface { @@ -44,21 +50,24 @@ type PubSub interface { Subscribe(ctx context.Context, channel string) (chan string, error) } -func newPubSub(conf config.SyncConfig, featureFlag SycnFeatureFlag) (PubSub, error) { - if featureFlag == SyncFeatureFlagNotificaiton { - if conf.Notification.Default == PubSubRedis { - return getPubSubRedis(conf) - } else { - return nil, errors.New("pubsub type not supported") - } - } else if featureFlag == SycnFeatureFlagDatafile { - if conf.Datafile.Default == PubSubRedis { - return getPubSubRedis(conf) - } else { - return nil, errors.New("pubsub type not supported") - } +func newPubSub(conf config.SyncConfig, featureFlag SyncFeatureFlag) (PubSub, error) { + var defaultPubSub string + + if featureFlag == SyncFeatureFlagNotification { + defaultPubSub = conf.Notification.Default + } else if featureFlag == SyncFeatureFlagDatafile { + defaultPubSub = conf.Datafile.Default + } else { + return nil, errors.New("provided feature flag not supported") + } + + // Only support auto-detection + if defaultPubSub == PubSubRedis { + // Use auto-detection (with fallback to Pub/Sub if detection fails) + return getPubSubWithAutoDetect(conf) } - return nil, errors.New("provided feature flag not supported") + + return nil, errors.New("pubsub type not supported") } func getPubSubRedis(conf config.SyncConfig) (PubSub, error) { @@ -81,27 +90,168 @@ func getPubSubRedis(conf config.SyncConfig) (PubSub, error) { return nil, errors.New("pubsub redis host not valid, host must be string") } - passwordVal, found := redisConf["password"] + // Support multiple auth field names and env var fallback for security scanning compliance + password := redisauth.GetPassword(redisConf, "REDIS_PASSWORD") + + databaseVal, found := redisConf["database"] + if !found { + return nil, errors.New("pubsub redis database not found") + } + // YAML/JSON unmarshals numbers as float64, convert to int + var database int + switch v := databaseVal.(type) { + case int: + database = v + case float64: + database = int(v) + default: + return nil, errors.New("pubsub redis database not valid, database must be numeric") + } + + // Return original Redis pub/sub implementation (fire-and-forget) + return &pubsub.Redis{ + Host: host, + Password: password, + Database: database, + }, nil +} + +func getPubSubRedisStreams(conf config.SyncConfig) (PubSub, error) { + pubsubConf, found := conf.Pubsub[PubSubRedis] + if !found { + return nil, errors.New("pubsub redis config not found") + } + + redisConf, ok := pubsubConf.(map[string]interface{}) + if !ok { + return nil, errors.New("pubsub redis config not valid") + } + + hostVal, found := redisConf["host"] if !found { - return nil, errors.New("pubsub redis password not found") + return nil, errors.New("pubsub redis host not found") } - password, ok := passwordVal.(string) + host, ok := hostVal.(string) if !ok { - return nil, errors.New("pubsub redis password not valid, password must be string") + return nil, errors.New("pubsub redis host not valid, host must be string") } + // Support multiple auth field names and env var fallback for security scanning compliance + password := redisauth.GetPassword(redisConf, "REDIS_PASSWORD") + databaseVal, found := redisConf["database"] if !found { return nil, errors.New("pubsub redis database not found") } - database, ok := databaseVal.(int) + // YAML/JSON unmarshals numbers as float64, convert to int + var database int + switch v := databaseVal.(type) { + case int: + database = v + case float64: + database = int(v) + default: + return nil, errors.New("pubsub redis database not valid, database must be numeric") + } + + // Parse optional Redis Streams configuration parameters + batchSize := getIntFromConfig(redisConf, "batch_size", 10) + flushInterval := getDurationFromConfig(redisConf, "flush_interval", 5*time.Second) + maxRetries := getIntFromConfig(redisConf, "max_retries", 3) + retryDelay := getDurationFromConfig(redisConf, "retry_delay", 100*time.Millisecond) + maxRetryDelay := getDurationFromConfig(redisConf, "max_retry_delay", 5*time.Second) + connTimeout := getDurationFromConfig(redisConf, "connection_timeout", 10*time.Second) + + // Return Redis Streams implementation with configuration + return &pubsub.RedisStreams{ + Host: host, + Password: password, + Database: database, + BatchSize: batchSize, + FlushInterval: flushInterval, + MaxRetries: maxRetries, + RetryDelay: retryDelay, + MaxRetryDelay: maxRetryDelay, + ConnTimeout: connTimeout, + }, nil +} + +// getIntFromConfig safely extracts an integer value from config map with default fallback +func getIntFromConfig(config map[string]interface{}, key string, defaultValue int) int { + if val, found := config[key]; found { + if intVal, ok := val.(int); ok { + return intVal + } + } + return defaultValue +} + +// getDurationFromConfig safely extracts a duration value from config map with default fallback +func getDurationFromConfig(config map[string]interface{}, key string, defaultValue time.Duration) time.Duration { + if val, found := config[key]; found { + if strVal, ok := val.(string); ok { + if duration, err := time.ParseDuration(strVal); err == nil { + return duration + } + } + } + return defaultValue +} + +// getPubSubWithAutoDetect creates a PubSub instance using Redis version auto-detection +// Falls back to Pub/Sub (safe default) if detection fails for any reason +func getPubSubWithAutoDetect(conf config.SyncConfig) (PubSub, error) { + pubsubConf, found := conf.Pubsub[PubSubRedis] + if !found { + return nil, errors.New("pubsub redis config not found") + } + + redisConf, ok := pubsubConf.(map[string]interface{}) if !ok { - return nil, errors.New("pubsub redis database not valid, database must be int") + return nil, errors.New("pubsub redis config not valid") } - return &pubsub.Redis{ - Host: host, + // Get connection details + hostVal, found := redisConf["host"] + if !found { + return nil, errors.New("pubsub redis host not found") + } + host, ok := hostVal.(string) + if !ok { + return nil, errors.New("pubsub redis host not valid, host must be string") + } + + password := redisauth.GetPassword(redisConf, "REDIS_PASSWORD") + + databaseVal, found := redisConf["database"] + if !found { + return nil, errors.New("pubsub redis database not found") + } + var database int + switch v := databaseVal.(type) { + case int: + database = v + case float64: + database = int(v) + default: + return nil, errors.New("pubsub redis database not valid, database must be numeric") + } + + // Create temporary Redis client for version detection + client := redis.NewClient(&redis.Options{ + Addr: host, Password: password, - Database: database, - }, nil + DB: database, + }) + defer client.Close() + + // Attempt version detection + log.Info().Msg("Auto-detecting Redis version to choose best notification implementation...") + if pubsub.SupportsRedisStreams(client) { + // Redis >= 5.0 - Use Streams + return getPubSubRedisStreams(conf) + } + + // Redis < 5.0 or detection failed - Use Pub/Sub (safe default) + return getPubSubRedis(conf) } diff --git a/pkg/syncer/pubsub/redis_streams.go b/pkg/syncer/pubsub/redis_streams.go new file mode 100644 index 00000000..1fe098da --- /dev/null +++ b/pkg/syncer/pubsub/redis_streams.go @@ -0,0 +1,565 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +// Package pubsub provides pubsub functionality for the agent syncer +package pubsub + +import ( + "context" + "encoding/json" + "fmt" + "math" + "os" + "strings" + "time" + + "github.com/go-redis/redis/v8" + "github.com/rs/zerolog/log" + + "github.com/optimizely/agent/pkg/metrics" +) + +// RedisStreams implements persistent message delivery using Redis Streams +type RedisStreams struct { + Host string + Password string + Database int + // Stream configuration + MaxLen int64 + ConsumerGroup string + ConsumerName string + // Batching configuration + BatchSize int + FlushInterval time.Duration + // Retry configuration + MaxRetries int + RetryDelay time.Duration + MaxRetryDelay time.Duration + // Connection timeout + ConnTimeout time.Duration + // Metrics registry + metricsRegistry *metrics.Registry +} + +func (r *RedisStreams) Publish(ctx context.Context, channel string, message interface{}) error { + streamName := r.getStreamName(channel) + + // Convert message to string for consistent handling + var messageStr string + switch v := message.(type) { + case []byte: + messageStr = string(v) + case string: + messageStr = v + default: + // For other types, marshal to JSON + jsonBytes, err := json.Marshal(v) + if err != nil { + return fmt.Errorf("failed to marshal message: %w", err) + } + messageStr = string(jsonBytes) + } + + // Add message to stream with automatic ID generation + args := &redis.XAddArgs{ + Stream: streamName, + Values: map[string]interface{}{ + "data": messageStr, + "timestamp": time.Now().Unix(), + }, + } + + // Apply max length trimming if configured + if r.MaxLen > 0 { + args.MaxLen = r.MaxLen + args.Approx = true // Use approximate trimming for better performance + } + + return r.executeWithRetry(ctx, func(client *redis.Client) error { + return client.XAdd(ctx, args).Err() + }) +} + +func (r *RedisStreams) Subscribe(ctx context.Context, channel string) (chan string, error) { + streamName := r.getStreamName(channel) + consumerGroup := r.getConsumerGroup() + consumerName := r.getConsumerName() + + ch := make(chan string) + ready := make(chan error, 1) // Signal when consumer group is ready + stop := make(chan struct{}) // Signal to stop goroutine + + go func() { + defer close(ch) + defer close(ready) // Ensure ready is closed + + batchSize := r.getBatchSize() + flushTicker := time.NewTicker(r.getFlushInterval()) + defer flushTicker.Stop() + + var batch []string + var client *redis.Client + var lastReconnect time.Time + reconnectDelay := 1 * time.Second + maxReconnectDelay := 30 * time.Second + + // Initialize connection + client = r.createClient() + defer client.Close() + + // Create consumer group with retry + if err := r.createConsumerGroupWithRetry(ctx, client, streamName, consumerGroup); err != nil { + log.Error().Err(err).Str("stream", streamName).Str("group", consumerGroup).Msg("Failed to create consumer group") + select { + case ready <- err: // Signal initialization failure + case <-stop: // Main function signaled stop + } + return + } + + // Signal that consumer group is ready + select { + case ready <- nil: + case <-stop: // Main function signaled stop + return + } + + for { + select { + case <-stop: + // Main function requested stop + return + case <-ctx.Done(): + // Send any remaining batch before closing + if len(batch) > 0 { + r.sendBatch(ch, batch, ctx) + } + return + case <-flushTicker.C: + // Flush interval reached - send current batch + if len(batch) > 0 { + r.incrementCounter("batch.flush_interval") + r.sendBatch(ch, batch, ctx) + batch = nil + } + default: + // Read messages from the stream using consumer group + streams, err := client.XReadGroup(ctx, &redis.XReadGroupArgs{ + Group: consumerGroup, + Consumer: consumerName, + Streams: []string{streamName, ">"}, + Count: int64(batchSize - len(batch)), // Read up to remaining batch size + Block: 100 * time.Millisecond, // Short block to allow flush checking + }).Result() + + if err != nil { + if err == redis.Nil { + continue // No messages, continue polling + } + + // Handle connection errors with exponential backoff reconnection + if r.isConnectionError(err) { + r.incrementCounter("connection.error") + log.Warn().Err(err).Msg("Redis connection error, attempting reconnection") + + // Apply exponential backoff for reconnection + if time.Since(lastReconnect) > reconnectDelay { + r.incrementCounter("connection.reconnect_attempt") + client.Close() + client = r.createClient() + lastReconnect = time.Now() + + // Recreate consumer group after reconnection + if groupErr := r.createConsumerGroupWithRetry(ctx, client, streamName, consumerGroup); groupErr != nil { + r.incrementCounter("connection.group_recreate_error") + log.Error().Err(groupErr).Msg("Failed to recreate consumer group after reconnection") + } else { + r.incrementCounter("connection.reconnect_success") + } + + // Increase reconnect delay with exponential backoff + reconnectDelay = time.Duration(math.Min(float64(reconnectDelay*2), float64(maxReconnectDelay))) + } else { + // Wait before next retry + time.Sleep(100 * time.Millisecond) + } + } else { + // Log other errors but continue processing + r.incrementCounter("read.error") + log.Debug().Err(err).Msg("Redis streams read error") + } + continue + } + + // Reset reconnect delay on successful read + reconnectDelay = 1 * time.Second + + // Process messages from streams + messageCount := 0 + for _, stream := range streams { + for _, message := range stream.Messages { + // Extract the data field + if data, ok := message.Values["data"].(string); ok { + batch = append(batch, data) + messageCount++ + + // Acknowledge the message with retry + if ackErr := r.acknowledgeMessage(ctx, client, streamName, consumerGroup, message.ID); ackErr != nil { + log.Warn().Err(ackErr).Str("messageID", message.ID).Msg("Failed to acknowledge message") + } + + // Send batch if it's full + if len(batch) >= batchSize { + r.incrementCounter("batch.sent") + r.sendBatch(ch, batch, ctx) + batch = nil + // Continue processing more messages + } + } + } + } + + // Track successful message reads + if messageCount > 0 { + r.incrementCounter("messages.read") + } + } + } + }() + + // Wait for consumer group initialization before returning + select { + case err := <-ready: + if err != nil { + close(stop) // Signal goroutine to stop on initialization error + return nil, err + } + // Success - goroutine continues running + return ch, nil + case <-ctx.Done(): + close(stop) // Signal goroutine to stop on context cancellation + return nil, ctx.Err() + } +} + +// Helper method to send batch to channel +func (r *RedisStreams) sendBatch(ch chan string, batch []string, ctx context.Context) { + for _, msg := range batch { + select { + case ch <- msg: + // Message sent successfully + case <-ctx.Done(): + return + } + } +} + +// Helper methods +func (r *RedisStreams) getStreamName(channel string) string { + return fmt.Sprintf("stream:%s", channel) +} + +func (r *RedisStreams) getConsumerGroup() string { + if r.ConsumerGroup == "" { + return "notifications" + } + return r.ConsumerGroup +} + +func (r *RedisStreams) getConsumerName() string { + if r.ConsumerName == "" { + hostname, _ := os.Hostname() + pid := os.Getpid() + return fmt.Sprintf("consumer-%s-%d-%d", hostname, pid, time.Now().Unix()) + } + return r.ConsumerName +} + +func (r *RedisStreams) getBatchSize() int { + if r.BatchSize <= 0 { + return 10 // Default batch size + } + return r.BatchSize +} + +func (r *RedisStreams) getFlushInterval() time.Duration { + if r.FlushInterval <= 0 { + return 5 * time.Second // Default flush interval + } + return r.FlushInterval +} + +func (r *RedisStreams) getMaxRetries() int { + if r.MaxRetries <= 0 { + return 3 // Default max retries + } + return r.MaxRetries +} + +func (r *RedisStreams) getRetryDelay() time.Duration { + if r.RetryDelay <= 0 { + return 100 * time.Millisecond // Default retry delay + } + return r.RetryDelay +} + +func (r *RedisStreams) getMaxRetryDelay() time.Duration { + if r.MaxRetryDelay <= 0 { + return 5 * time.Second // Default max retry delay + } + return r.MaxRetryDelay +} + +func (r *RedisStreams) getConnTimeout() time.Duration { + if r.ConnTimeout <= 0 { + return 10 * time.Second // Default connection timeout + } + return r.ConnTimeout +} + +// createClient creates a new Redis client with configured timeouts +func (r *RedisStreams) createClient() *redis.Client { + return redis.NewClient(&redis.Options{ + Addr: r.Host, + Password: r.Password, + DB: r.Database, + DialTimeout: r.getConnTimeout(), + ReadTimeout: r.getConnTimeout(), + WriteTimeout: r.getConnTimeout(), + PoolTimeout: r.getConnTimeout(), + }) +} + +// executeWithRetry executes a Redis operation with retry logic +func (r *RedisStreams) executeWithRetry(ctx context.Context, operation func(client *redis.Client) error) error { + start := time.Now() + maxRetries := r.getMaxRetries() + retryDelay := r.getRetryDelay() + maxRetryDelay := r.getMaxRetryDelay() + + var lastErr error + for attempt := 0; attempt <= maxRetries; attempt++ { + client := r.createClient() + var err error + func() { + defer client.Close() // Always executes, even on panic + err = operation(client) + }() + + if err == nil { + // Record successful operation metrics + r.incrementCounter("operations.success") + r.recordTimer("operations.duration", time.Since(start).Seconds()) + if attempt > 0 { + r.incrementCounter("retries.success") + } + return nil // Success + } + + lastErr = err + r.incrementCounter("operations.error") + + // Don't retry on non-recoverable errors + if !r.isRetryableError(err) { + r.incrementCounter("errors.non_retryable") + return fmt.Errorf("non-retryable error: %w", err) + } + + // Don't sleep after the last attempt + if attempt < maxRetries { + r.incrementCounter("retries.attempt") + // Calculate delay with exponential backoff + delay := time.Duration(math.Min(float64(retryDelay)*math.Pow(2, float64(attempt)), float64(maxRetryDelay))) + + select { + case <-ctx.Done(): + r.incrementCounter("operations.canceled") + return ctx.Err() + case <-time.After(delay): + // Continue to next retry + } + } + } + + r.incrementCounter("retries.exhausted") + return fmt.Errorf("operation failed after %d retries: %w", maxRetries, lastErr) +} + +// createConsumerGroupWithRetry creates a consumer group with retry logic +func (r *RedisStreams) createConsumerGroupWithRetry(ctx context.Context, client *redis.Client, streamName, consumerGroup string) error { + maxRetries := r.getMaxRetries() + retryDelay := r.getRetryDelay() + maxRetryDelay := r.getMaxRetryDelay() + + var lastErr error + for attempt := 0; attempt <= maxRetries; attempt++ { + _, err := client.XGroupCreateMkStream(ctx, streamName, consumerGroup, "$").Result() + if err == nil || err.Error() == "BUSYGROUP Consumer Group name already exists" { + return nil // Success + } + + lastErr = err + + // Don't retry on non-recoverable errors + if !r.isRetryableError(err) { + return fmt.Errorf("non-retryable error creating consumer group: %w", err) + } + + // Don't sleep after the last attempt + if attempt < maxRetries { + // Calculate delay with exponential backoff + delay := time.Duration(math.Min(float64(retryDelay)*math.Pow(2, float64(attempt)), float64(maxRetryDelay))) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + // Continue to next retry + } + } + } + + return fmt.Errorf("failed to create consumer group after %d retries: %w", maxRetries, lastErr) +} + +// acknowledgeMessage acknowledges a message with retry logic +func (r *RedisStreams) acknowledgeMessage(ctx context.Context, client *redis.Client, streamName, consumerGroup, messageID string) error { + maxRetries := 2 // Fewer retries for ACK operations + retryDelay := 50 * time.Millisecond + + var lastErr error + for attempt := 0; attempt <= maxRetries; attempt++ { + err := client.XAck(ctx, streamName, consumerGroup, messageID).Err() + if err == nil { + r.incrementCounter("ack.success") + if attempt > 0 { + r.incrementCounter("ack.retry_success") + } + return nil // Success + } + + lastErr = err + r.incrementCounter("ack.error") + + // Don't retry on non-recoverable errors + if !r.isRetryableError(err) { + r.incrementCounter("ack.non_retryable_error") + return fmt.Errorf("non-retryable ACK error: %w", err) + } + + // Don't sleep after the last attempt + if attempt < maxRetries { + r.incrementCounter("ack.retry_attempt") + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(retryDelay): + // Continue to next retry + } + } + } + + r.incrementCounter("ack.retry_exhausted") + return fmt.Errorf("ACK failed after %d retries: %w", maxRetries, lastErr) +} + +// isRetryableError determines if an error is retryable +func (r *RedisStreams) isRetryableError(err error) bool { + if err == nil { + return false + } + + errStr := err.Error() + + // Network/connection errors that are retryable + retryableErrors := []string{ + "connection refused", + "connection reset", + "timeout", + "network is unreachable", + "broken pipe", + "eof", + "i/o timeout", + "connection pool exhausted", + "context deadline exceeded", + "context canceled", // Handle graceful shutdowns + "no such host", // DNS lookup failures + } + + for _, retryable := range retryableErrors { + if strings.Contains(strings.ToLower(errStr), retryable) { + return true + } + } + + // Redis-specific retryable errors + if strings.Contains(errStr, "LOADING") || // Redis is loading data + strings.Contains(errStr, "READONLY") || // Redis is in read-only mode + strings.Contains(errStr, "CLUSTERDOWN") { // Redis cluster is down + return true + } + + return false +} + +// isConnectionError determines if an error is a connection error +func (r *RedisStreams) isConnectionError(err error) bool { + if err == nil { + return false + } + + errStr := err.Error() + + connectionErrors := []string{ + "connection refused", + "connection reset", + "network is unreachable", + "broken pipe", + "eof", + "connection pool exhausted", + } + + for _, connErr := range connectionErrors { + if strings.Contains(strings.ToLower(errStr), connErr) { + return true + } + } + + return false +} + +// SetMetricsRegistry sets the metrics registry for tracking statistics +func (r *RedisStreams) SetMetricsRegistry(registry *metrics.Registry) { + r.metricsRegistry = registry +} + +// incrementCounter safely increments a metrics counter if registry is available +func (r *RedisStreams) incrementCounter(key string) { + if r.metricsRegistry != nil { + if counter := r.metricsRegistry.GetCounter("redis_streams." + key); counter != nil { + counter.Add(1) + } + } +} + +// recordTimer safely records a timer metric if registry is available +func (r *RedisStreams) recordTimer(key string, duration float64) { + if r.metricsRegistry != nil { + if timer := r.metricsRegistry.NewTimer("redis_streams." + key); timer != nil { + timer.Update(duration) + } + } +} diff --git a/pkg/syncer/pubsub/redis_streams_error_test.go b/pkg/syncer/pubsub/redis_streams_error_test.go new file mode 100644 index 00000000..688c065b --- /dev/null +++ b/pkg/syncer/pubsub/redis_streams_error_test.go @@ -0,0 +1,481 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +package pubsub + +import ( + "context" + "errors" + "strings" + "testing" + "time" + + "github.com/go-redis/redis/v8" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/optimizely/agent/pkg/metrics" +) + +func setupRedisStreamsWithRetry() *RedisStreams { + return &RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + MaxLen: 1000, + ConsumerGroup: "test-group", + ConsumerName: "test-consumer", + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 50 * time.Millisecond, + MaxRetryDelay: 1 * time.Second, + ConnTimeout: 5 * time.Second, + // Don't set metricsRegistry by default to avoid conflicts + metricsRegistry: nil, + } +} + +func TestRedisStreams_RetryConfiguration_Defaults(t *testing.T) { + rs := &RedisStreams{} + + assert.Equal(t, 3, rs.getMaxRetries()) + assert.Equal(t, 100*time.Millisecond, rs.getRetryDelay()) + assert.Equal(t, 5*time.Second, rs.getMaxRetryDelay()) + assert.Equal(t, 10*time.Second, rs.getConnTimeout()) +} + +func TestRedisStreams_RetryConfiguration_Custom(t *testing.T) { + rs := &RedisStreams{ + MaxRetries: 5, + RetryDelay: 200 * time.Millisecond, + MaxRetryDelay: 10 * time.Second, + ConnTimeout: 30 * time.Second, + } + + assert.Equal(t, 5, rs.getMaxRetries()) + assert.Equal(t, 200*time.Millisecond, rs.getRetryDelay()) + assert.Equal(t, 10*time.Second, rs.getMaxRetryDelay()) + assert.Equal(t, 30*time.Second, rs.getConnTimeout()) +} + +func TestRedisStreams_IsRetryableError(t *testing.T) { + rs := setupRedisStreamsWithRetry() + + testCases := []struct { + name string + err error + retryable bool + }{ + { + name: "nil error", + err: nil, + retryable: false, + }, + { + name: "connection refused", + err: errors.New("connection refused"), + retryable: true, + }, + { + name: "connection reset", + err: errors.New("connection reset by peer"), + retryable: true, + }, + { + name: "timeout error", + err: errors.New("i/o timeout"), + retryable: true, + }, + { + name: "network unreachable", + err: errors.New("network is unreachable"), + retryable: true, + }, + { + name: "broken pipe", + err: errors.New("broken pipe"), + retryable: true, + }, + { + name: "EOF error", + err: errors.New("EOF"), + retryable: true, + }, + { + name: "context deadline exceeded", + err: errors.New("context deadline exceeded"), + retryable: true, + }, + { + name: "context canceled", + err: errors.New("context canceled"), + retryable: true, + }, + { + name: "Redis LOADING", + err: errors.New("LOADING Redis is loading the dataset in memory"), + retryable: true, + }, + { + name: "Redis READONLY", + err: errors.New("READONLY You can't write against a read only replica."), + retryable: true, + }, + { + name: "Redis CLUSTERDOWN", + err: errors.New("CLUSTERDOWN Hash slot not served"), + retryable: true, + }, + { + name: "syntax error - not retryable", + err: errors.New("ERR syntax error"), + retryable: false, + }, + { + name: "wrong type error - not retryable", + err: errors.New("WRONGTYPE Operation against a key holding the wrong kind of value"), + retryable: false, + }, + { + name: "authentication error - not retryable", + err: errors.New("NOAUTH Authentication required"), + retryable: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := rs.isRetryableError(tc.err) + assert.Equal(t, tc.retryable, result, "Error: %v", tc.err) + }) + } +} + +func TestRedisStreams_IsConnectionError(t *testing.T) { + rs := setupRedisStreamsWithRetry() + + testCases := []struct { + name string + err error + isConnection bool + }{ + { + name: "nil error", + err: nil, + isConnection: false, + }, + { + name: "connection refused", + err: errors.New("connection refused"), + isConnection: true, + }, + { + name: "connection reset", + err: errors.New("connection reset by peer"), + isConnection: true, + }, + { + name: "network unreachable", + err: errors.New("network is unreachable"), + isConnection: true, + }, + { + name: "broken pipe", + err: errors.New("broken pipe"), + isConnection: true, + }, + { + name: "EOF error", + err: errors.New("EOF"), + isConnection: true, + }, + { + name: "syntax error - not connection", + err: errors.New("ERR syntax error"), + isConnection: false, + }, + { + name: "timeout - not connection", + err: errors.New("i/o timeout"), + isConnection: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := rs.isConnectionError(tc.err) + assert.Equal(t, tc.isConnection, result, "Error: %v", tc.err) + }) + } +} + +func TestRedisStreams_Publish_WithInvalidHost_ShouldRetry(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.Host = "invalid-host:6379" // Use invalid host to trigger connection errors + rs.MaxRetries = 2 // Limit retries for faster test + rs.RetryDelay = 10 * time.Millisecond + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := rs.Publish(ctx, "test-channel", "test message") + + // Should fail with either retry exhaustion or non-retryable error (DNS lookup can fail differently in CI) + assert.Error(t, err) + errMsg := err.Error() + assert.True(t, + strings.Contains(errMsg, "operation failed after") || + strings.Contains(errMsg, "non-retryable error") || + strings.Contains(errMsg, "lookup invalid-host"), + "Expected retry or DNS error, got: %s", errMsg) +} + +func TestRedisStreams_Publish_WithCanceledContext(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.Host = "invalid-host:6379" // Use invalid host to trigger retries + rs.MaxRetries = 5 + rs.RetryDelay = 100 * time.Millisecond + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel context immediately to test cancellation handling + cancel() + + err := rs.Publish(ctx, "test-channel", "test message") + + // Should fail with context canceled error + assert.Error(t, err) + // Could be either context canceled directly or wrapped in retry error + assert.True(t, strings.Contains(err.Error(), "context canceled") || + strings.Contains(err.Error(), "operation failed after")) +} + +func TestRedisStreams_MetricsIntegration(t *testing.T) { + rs := setupRedisStreamsWithRetry() + + // Test that metrics registry can be set and retrieved + registry := metrics.NewRegistry("metrics_integration_test") + rs.SetMetricsRegistry(registry) + + assert.Equal(t, registry, rs.metricsRegistry) +} + +func TestRedisStreams_MetricsTracking_SafeWithNilRegistry(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.metricsRegistry = nil + + // These should not panic with nil registry + rs.incrementCounter("test.counter") + rs.recordTimer("test.timer", 1.5) +} + +func TestRedisStreams_CreateClient_WithTimeouts(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.ConnTimeout = 2 * time.Second + + client := rs.createClient() + defer client.Close() + + assert.NotNil(t, client) + // Note: go-redis client options are not easily inspectable, + // but we can verify the client was created without error +} + +func TestRedisStreams_AcknowledgeMessage_WithRetry(t *testing.T) { + // This test requires a running Redis instance + rs := setupRedisStreamsWithRetry() + ctx := context.Background() + + // Create a client to set up test data + client := redis.NewClient(&redis.Options{ + Addr: rs.Host, + Password: rs.Password, + DB: rs.Database, + }) + defer client.Close() + + streamName := "test-ack-stream" + consumerGroup := "test-ack-group" + + // Clean up + defer func() { + client.Del(ctx, streamName) + }() + + // Add a message to the stream + msgID, err := client.XAdd(ctx, &redis.XAddArgs{ + Stream: streamName, + Values: map[string]interface{}{ + "data": "test message", + }, + }).Result() + require.NoError(t, err) + + // Create consumer group + client.XGroupCreateMkStream(ctx, streamName, consumerGroup, "0") + + // Test acknowledge with valid message ID (should succeed) + err = rs.acknowledgeMessage(ctx, client, streamName, consumerGroup, msgID) + assert.NoError(t, err) + + // Test acknowledge with invalid message ID (should fail but not crash) + err = rs.acknowledgeMessage(ctx, client, streamName, consumerGroup, "invalid-id") + assert.Error(t, err) +} + +func TestRedisStreams_ExecuteWithRetry_NonRetryableError(t *testing.T) { + rs := setupRedisStreamsWithRetry() + ctx := context.Background() + + // Simulate a non-retryable error + operation := func(client *redis.Client) error { + return errors.New("ERR syntax error") // Non-retryable + } + + err := rs.executeWithRetry(ctx, operation) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "non-retryable error") + assert.Contains(t, err.Error(), "ERR syntax error") +} + +func TestRedisStreams_ExecuteWithRetry_SuccessAfterRetries(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.RetryDelay = 1 * time.Millisecond // Fast retries for testing + // Don't set metrics registry to avoid expvar name conflicts across tests + // (expvar counters are global and can't be reused even with unique registry names) + ctx := context.Background() + + attemptCount := 0 + operation := func(client *redis.Client) error { + attemptCount++ + if attemptCount < 3 { + return errors.New("connection refused") // Retryable + } + return nil // Success on third attempt + } + + err := rs.executeWithRetry(ctx, operation) + + assert.NoError(t, err) + assert.Equal(t, 3, attemptCount) +} + +func TestRedisStreams_ExecuteWithRetry_ExhaustRetries(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.MaxRetries = 2 + rs.RetryDelay = 1 * time.Millisecond // Fast retries for testing + // Don't set metrics registry to avoid expvar name conflicts across tests + // (expvar counters are global and can't be reused even with unique registry names) + ctx := context.Background() + + attemptCount := 0 + operation := func(client *redis.Client) error { + attemptCount++ + return errors.New("connection refused") // Always retryable error + } + + err := rs.executeWithRetry(ctx, operation) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "operation failed after 2 retries") + assert.Equal(t, 3, attemptCount) // 1 initial + 2 retries +} + +func TestRedisStreams_CreateConsumerGroupWithRetry_BusyGroupExists(t *testing.T) { + rs := setupRedisStreamsWithRetry() + ctx := context.Background() + + // Create a client to set up test data + client := redis.NewClient(&redis.Options{ + Addr: rs.Host, + Password: rs.Password, + DB: rs.Database, + }) + defer client.Close() + + streamName := "test-busy-group-stream" + consumerGroup := "test-busy-group" + + // Clean up + defer func() { + client.Del(ctx, streamName) + }() + + // First call should succeed + err := rs.createConsumerGroupWithRetry(ctx, client, streamName, consumerGroup) + assert.NoError(t, err) + + // Second call should also succeed (BUSYGROUP error is handled) + err = rs.createConsumerGroupWithRetry(ctx, client, streamName, consumerGroup) + assert.NoError(t, err) +} + +func TestRedisStreams_ErrorHandling_ContextCancellation(t *testing.T) { + rs := setupRedisStreamsWithRetry() + rs.RetryDelay = 100 * time.Millisecond + // Don't set metrics registry to avoid expvar name conflicts across tests + // (expvar counters are global and can't be reused even with unique registry names) + + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + // Cancel context after a short delay + time.Sleep(50 * time.Millisecond) + cancel() + }() + + operation := func(client *redis.Client) error { + return errors.New("connection refused") // Retryable error + } + + err := rs.executeWithRetry(ctx, operation) + + assert.Error(t, err) + assert.Equal(t, context.Canceled, err) +} + +func TestRedisStreams_Subscribe_ErrorRecovery_Integration(t *testing.T) { + // Integration test - requires Redis to be running + rs := setupRedisStreamsWithRetry() + rs.MaxRetries = 1 // Limit retries for faster test + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + channel := "test-error-recovery" + defer cleanupRedisStream(rs.getStreamName(channel)) + + // Start subscriber + ch, err := rs.Subscribe(ctx, channel) + require.NoError(t, err) + + // Give some time for setup + time.Sleep(100 * time.Millisecond) + + // Publish a message + err = rs.Publish(ctx, channel, "test message") + require.NoError(t, err) + + // Should receive the message despite any internal error recovery + // Wait longer than flush interval (5 seconds) to ensure batch is flushed + select { + case received := <-ch: + assert.Equal(t, "test message", received) + case <-time.After(6 * time.Second): + t.Fatal("Timeout waiting for message") + } +} diff --git a/pkg/syncer/pubsub/redis_streams_test.go b/pkg/syncer/pubsub/redis_streams_test.go new file mode 100644 index 00000000..c4bf48d1 --- /dev/null +++ b/pkg/syncer/pubsub/redis_streams_test.go @@ -0,0 +1,343 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +package pubsub + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/go-redis/redis/v8" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + testRedisHost = "localhost:6379" + testDatabase = 0 + testPassword = "" +) + +func setupRedisStreams() *RedisStreams { + return &RedisStreams{ + Host: testRedisHost, + Password: testPassword, + Database: testDatabase, + MaxLen: 1000, + ConsumerGroup: "test-group", + ConsumerName: "test-consumer", + BatchSize: 10, + FlushInterval: 5 * time.Second, + } +} + +func cleanupRedisStream(streamName string) { + client := redis.NewClient(&redis.Options{ + Addr: testRedisHost, + Password: testPassword, + DB: testDatabase, + }) + defer client.Close() + + // Delete the stream and consumer group + client.Del(context.Background(), streamName) +} + +func TestRedisStreams_Publish_String(t *testing.T) { + rs := setupRedisStreams() + ctx := context.Background() + channel := "test-channel-string" + message := "test message" + + defer cleanupRedisStream(rs.getStreamName(channel)) + + err := rs.Publish(ctx, channel, message) + assert.NoError(t, err) + + // Verify message was added to stream + client := redis.NewClient(&redis.Options{ + Addr: testRedisHost, + Password: testPassword, + DB: testDatabase, + }) + defer client.Close() + + streamName := rs.getStreamName(channel) + messages, err := client.XRange(ctx, streamName, "-", "+").Result() + require.NoError(t, err) + assert.Len(t, messages, 1) + + // Check message content + data, exists := messages[0].Values["data"] + assert.True(t, exists) + assert.Equal(t, message, data) + + // Check timestamp exists + timestamp, exists := messages[0].Values["timestamp"] + assert.True(t, exists) + assert.NotNil(t, timestamp) +} + +func TestRedisStreams_Publish_JSON(t *testing.T) { + rs := setupRedisStreams() + ctx := context.Background() + channel := "test-channel-json" + + testObj := map[string]interface{}{ + "type": "notification", + "payload": "test data", + "id": 123, + } + + defer cleanupRedisStream(rs.getStreamName(channel)) + + err := rs.Publish(ctx, channel, testObj) + assert.NoError(t, err) + + // Verify message was serialized correctly + client := redis.NewClient(&redis.Options{ + Addr: testRedisHost, + Password: testPassword, + DB: testDatabase, + }) + defer client.Close() + + streamName := rs.getStreamName(channel) + messages, err := client.XRange(ctx, streamName, "-", "+").Result() + require.NoError(t, err) + assert.Len(t, messages, 1) + + // Check JSON was stored correctly + data, exists := messages[0].Values["data"] + assert.True(t, exists) + + var decoded map[string]interface{} + err = json.Unmarshal([]byte(data.(string)), &decoded) + require.NoError(t, err) + assert.Equal(t, testObj["type"], decoded["type"]) + assert.Equal(t, testObj["payload"], decoded["payload"]) + assert.Equal(t, float64(123), decoded["id"]) // JSON numbers become float64 +} + +func TestRedisStreams_Publish_ByteArray(t *testing.T) { + rs := setupRedisStreams() + ctx := context.Background() + channel := "test-channel-bytes" + message := []byte("test byte message") + + defer cleanupRedisStream(rs.getStreamName(channel)) + + err := rs.Publish(ctx, channel, message) + assert.NoError(t, err) + + // Verify message was stored as string + client := redis.NewClient(&redis.Options{ + Addr: testRedisHost, + Password: testPassword, + DB: testDatabase, + }) + defer client.Close() + + streamName := rs.getStreamName(channel) + messages, err := client.XRange(ctx, streamName, "-", "+").Result() + require.NoError(t, err) + assert.Len(t, messages, 1) + + data, exists := messages[0].Values["data"] + assert.True(t, exists) + assert.Equal(t, string(message), data) +} + +func TestRedisStreams_Subscribe_BasicFlow(t *testing.T) { + rs := setupRedisStreams() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + channel := "test-channel-subscribe" + defer cleanupRedisStream(rs.getStreamName(channel)) + + // Start subscriber + ch, err := rs.Subscribe(ctx, channel) + require.NoError(t, err) + + // Give subscriber time to set up + time.Sleep(100 * time.Millisecond) + + // Publish a message AFTER subscriber is ready + testMessage := "subscription test message" + err = rs.Publish(ctx, channel, testMessage) + require.NoError(t, err) + + // Wait for message (longer than flush interval to ensure batch is flushed) + select { + case received := <-ch: + assert.Equal(t, testMessage, received) + case <-time.After(6 * time.Second): + t.Fatal("Timeout waiting for message") + } +} + +func TestRedisStreams_Subscribe_MultipleMessages(t *testing.T) { + rs := setupRedisStreams() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + channel := "test-channel-multiple" + defer cleanupRedisStream(rs.getStreamName(channel)) + + // Start subscriber + ch, err := rs.Subscribe(ctx, channel) + require.NoError(t, err) + + // Give subscriber time to set up + time.Sleep(100 * time.Millisecond) + + // Publish multiple messages AFTER subscriber is ready + messages := []string{"message1", "message2", "message3"} + for _, msg := range messages { + err = rs.Publish(ctx, channel, msg) + require.NoError(t, err) + } + + // Collect received messages + // Wait longer than flush interval (5 seconds) to ensure batch is flushed + var received []string + timeout := time.After(6 * time.Second) + + for i := 0; i < len(messages); i++ { + select { + case msg := <-ch: + received = append(received, msg) + case <-timeout: + t.Fatalf("Timeout waiting for message %d", i+1) + } + } + + assert.ElementsMatch(t, messages, received) +} + +func TestRedisStreams_HelperMethods(t *testing.T) { + rs := setupRedisStreams() + + // Test getStreamName + channel := "test-channel" + expected := "stream:test-channel" + assert.Equal(t, expected, rs.getStreamName(channel)) + + // Test getConsumerGroup + assert.Equal(t, "test-group", rs.getConsumerGroup()) + + // Test getConsumerGroup with empty value + rs.ConsumerGroup = "" + assert.Equal(t, "notifications", rs.getConsumerGroup()) + + // Test getConsumerName + rs.ConsumerName = "custom-consumer" + assert.Equal(t, "custom-consumer", rs.getConsumerName()) + + // Test getConsumerName with empty value (should generate unique name) + rs.ConsumerName = "" + name1 := rs.getConsumerName() + assert.Contains(t, name1, "consumer-") + // Note: getConsumerName generates the same name unless we create a new instance + + // Test getBatchSize + assert.Equal(t, 10, rs.getBatchSize()) + rs.BatchSize = 0 + assert.Equal(t, 10, rs.getBatchSize()) // Default + rs.BatchSize = -5 + assert.Equal(t, 10, rs.getBatchSize()) // Default for negative + + // Test getFlushInterval + rs.FlushInterval = 3 * time.Second + assert.Equal(t, 3*time.Second, rs.getFlushInterval()) + rs.FlushInterval = 0 + assert.Equal(t, 5*time.Second, rs.getFlushInterval()) // Default + rs.FlushInterval = -1 * time.Second + assert.Equal(t, 5*time.Second, rs.getFlushInterval()) // Default for negative +} + +func TestRedisStreams_Batching_Behavior(t *testing.T) { + rs := setupRedisStreams() + rs.BatchSize = 3 // Set small batch size for testing + rs.FlushInterval = 10 * time.Second // Long interval to test batch size trigger + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + channel := "test-channel-batching" + defer cleanupRedisStream(rs.getStreamName(channel)) + + // Start subscriber + ch, err := rs.Subscribe(ctx, channel) + require.NoError(t, err) + + // Publish messages to trigger batch + messages := []string{"batch1", "batch2", "batch3"} + for _, msg := range messages { + err = rs.Publish(ctx, channel, msg) + require.NoError(t, err) + } + + // Should receive all messages in one batch + var received []string + timeout := time.After(3 * time.Second) + + for len(received) < len(messages) { + select { + case msg := <-ch: + received = append(received, msg) + case <-timeout: + t.Fatalf("Timeout waiting for batched messages. Received %d out of %d", len(received), len(messages)) + } + } + + assert.ElementsMatch(t, messages, received) +} + +func TestRedisStreams_MaxLen_Configuration(t *testing.T) { + rs := setupRedisStreams() + rs.MaxLen = 2 // Very small max length + + ctx := context.Background() + channel := "test-channel-maxlen" + defer cleanupRedisStream(rs.getStreamName(channel)) + + // Publish more messages than MaxLen + messages := []string{"msg1", "msg2", "msg3", "msg4"} + for _, msg := range messages { + err := rs.Publish(ctx, channel, msg) + require.NoError(t, err) + } + + // Verify stream was trimmed + client := redis.NewClient(&redis.Options{ + Addr: testRedisHost, + Password: testPassword, + DB: testDatabase, + }) + defer client.Close() + + streamName := rs.getStreamName(channel) + length, err := client.XLen(ctx, streamName).Result() + require.NoError(t, err) + + // Should be approximately MaxLen (Redis uses approximate trimming) + // With APPROX, Redis may keep more entries than specified + assert.LessOrEqual(t, length, int64(10)) // Allow generous buffer for approximate trimming +} diff --git a/pkg/syncer/pubsub/redis_version.go b/pkg/syncer/pubsub/redis_version.go new file mode 100644 index 00000000..254f6e62 --- /dev/null +++ b/pkg/syncer/pubsub/redis_version.go @@ -0,0 +1,105 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +// Package pubsub provides pubsub functionality for the agent syncer +package pubsub + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + "github.com/go-redis/redis/v8" + "github.com/rs/zerolog/log" +) + +const ( + // MinRedisVersionForStreams is the minimum Redis version that supports Streams + MinRedisVersionForStreams = 5 + // RedisVersionCheckTimeout is the timeout for version detection + RedisVersionCheckTimeout = 5 * time.Second +) + +// detectRedisMajorVersion attempts to detect the major version of Redis +// Returns the major version number (e.g., 6 for Redis 6.2.5) or 0 on error +func detectRedisMajorVersion(client *redis.Client) (int, error) { + ctx, cancel := context.WithTimeout(context.Background(), RedisVersionCheckTimeout) + defer cancel() + + // Try to get server info + info, err := client.Info(ctx, "server").Result() + if err != nil { + return 0, fmt.Errorf("failed to execute INFO server command: %w", err) + } + + // Parse version from "redis_version:x.y.z" + // Expected format: "redis_version:6.2.5" or "redis_version:7.0.0" + for _, line := range strings.Split(info, "\r\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "redis_version:") { + version := strings.TrimPrefix(line, "redis_version:") + version = strings.TrimSpace(version) + + // Split on dots: "6.2.5" -> ["6", "2", "5"] + parts := strings.Split(version, ".") + if len(parts) < 1 { + return 0, fmt.Errorf("invalid version format: %s", version) + } + + // Parse major version (first part) + major, err := strconv.Atoi(parts[0]) + if err != nil { + return 0, fmt.Errorf("failed to parse major version from %s: %w", version, err) + } + + if major < 2 { + return 0, fmt.Errorf("unsupported Redis version: %d (minimum supported: 2)", major) + } + + return major, nil + } + } + + return 0, fmt.Errorf("redis_version field not found in INFO server output") +} + +// SupportsRedisStreams checks if the connected Redis instance supports Streams +// Returns true if Redis >= 5.0, false otherwise +// Falls back to false on any error (safe default) +func SupportsRedisStreams(client *redis.Client) bool { + majorVersion, err := detectRedisMajorVersion(client) + if err != nil { + // Detection failed - log warning and return false (safe fallback) + log.Warn().Err(err).Msg("Could not detect Redis version - will use Pub/Sub as safe fallback") + return false + } + + supported := majorVersion >= MinRedisVersionForStreams + if supported { + log.Info(). + Int("redis_version", majorVersion). + Msg("Redis Streams supported - will use Streams for notifications") + } else { + log.Info(). + Int("redis_version", majorVersion). + Int("min_required", MinRedisVersionForStreams). + Msg("Redis Streams not supported - will use Pub/Sub for notifications") + } + + return supported +} diff --git a/pkg/syncer/pubsub/redis_version_test.go b/pkg/syncer/pubsub/redis_version_test.go new file mode 100644 index 00000000..514d2339 --- /dev/null +++ b/pkg/syncer/pubsub/redis_version_test.go @@ -0,0 +1,279 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +// Package pubsub provides pubsub functionality for the agent syncer +package pubsub + +import ( + "context" + "testing" + + "github.com/go-redis/redis/v8" + "github.com/go-redis/redismock/v8" +) + +func TestDetectRedisMajorVersion(t *testing.T) { + tests := []struct { + name string + infoOutput string + infoError error + wantVersion int + wantErr bool + }{ + { + name: "Redis 6.2.5", + infoOutput: "# Server\r\nredis_version:6.2.5\r\nredis_git_sha1:00000000\r\nredis_git_dirty:0\r\nredis_build_id:1234567890\r\nredis_mode:standalone\r\nos:Linux 5.10.0-8-amd64 x86_64\r\narch_bits:64", + wantVersion: 6, + wantErr: false, + }, + { + name: "Redis 7.0.0", + infoOutput: "# Server\r\nredis_version:7.0.0\r\nredis_git_sha1:00000000", + wantVersion: 7, + wantErr: false, + }, + { + name: "Redis 5.0.14", + infoOutput: "# Server\r\nredis_version:5.0.14\r\nredis_git_sha1:00000000", + wantVersion: 5, + wantErr: false, + }, + { + name: "Redis 4.0.14", + infoOutput: "# Server\r\nredis_version:4.0.14\r\nredis_git_sha1:00000000", + wantVersion: 4, + wantErr: false, + }, + { + name: "Redis 3.2.12", + infoOutput: "# Server\r\nredis_version:3.2.12\r\nredis_git_sha1:00000000", + wantVersion: 3, + wantErr: false, + }, + { + name: "Redis 2.8.24", + infoOutput: "# Server\r\nredis_version:2.8.24\r\nredis_git_sha1:00000000", + wantVersion: 2, + wantErr: false, + }, + { + name: "Version with extra whitespace", + infoOutput: "# Server\r\nredis_version: 6.2.5\r\nredis_git_sha1:00000000", + wantVersion: 6, + wantErr: false, + }, + { + name: "Version field missing", + infoOutput: "# Server\r\nredis_git_sha1:00000000\r\nredis_git_dirty:0", + wantVersion: 0, + wantErr: true, + }, + { + name: "Invalid version format (no dots)", + infoOutput: "# Server\r\nredis_version:6\r\nredis_git_sha1:00000000", + wantVersion: 6, + wantErr: false, + }, + { + name: "Invalid version format (non-numeric)", + infoOutput: "# Server\r\nredis_version:abc.def.ghi\r\nredis_git_sha1:00000000", + wantVersion: 0, + wantErr: true, + }, + { + name: "Unsupported Redis 1.x", + infoOutput: "# Server\r\nredis_version:1.2.6\r\nredis_git_sha1:00000000", + wantVersion: 0, + wantErr: true, + }, + { + name: "INFO command blocked (NOPERM)", + infoOutput: "", + infoError: redis.NewCmd(context.Background()).Err(), + wantVersion: 0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create mock Redis client + db, mock := redismock.NewClientMock() + defer db.Close() + + // Setup mock expectation + if tt.infoError != nil { + mock.ExpectInfo("server").SetErr(tt.infoError) + } else { + mock.ExpectInfo("server").SetVal(tt.infoOutput) + } + + // Run test + got, err := detectRedisMajorVersion(db) + + // Check expectations were met + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("Redis mock expectations not met: %v", err) + } + + // Verify results + if (err != nil) != tt.wantErr { + t.Errorf("detectRedisMajorVersion() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.wantVersion { + t.Errorf("detectRedisMajorVersion() = %v, want %v", got, tt.wantVersion) + } + }) + } +} + +func TestSupportsRedisStreams(t *testing.T) { + tests := []struct { + name string + infoOutput string + infoError error + wantSupport bool + }{ + { + name: "Redis 7.x supports Streams", + infoOutput: "# Server\r\nredis_version:7.0.0\r\nredis_git_sha1:00000000", + wantSupport: true, + }, + { + name: "Redis 6.x supports Streams", + infoOutput: "# Server\r\nredis_version:6.2.5\r\nredis_git_sha1:00000000", + wantSupport: true, + }, + { + name: "Redis 5.x supports Streams", + infoOutput: "# Server\r\nredis_version:5.0.14\r\nredis_git_sha1:00000000", + wantSupport: true, + }, + { + name: "Redis 4.x does not support Streams", + infoOutput: "# Server\r\nredis_version:4.0.14\r\nredis_git_sha1:00000000", + wantSupport: false, + }, + { + name: "Redis 3.x does not support Streams", + infoOutput: "# Server\r\nredis_version:3.2.12\r\nredis_git_sha1:00000000", + wantSupport: false, + }, + { + name: "Redis 2.x does not support Streams", + infoOutput: "# Server\r\nredis_version:2.8.24\r\nredis_git_sha1:00000000", + wantSupport: false, + }, + { + name: "Detection failure falls back to false (safe default)", + infoOutput: "", + infoError: redis.NewCmd(context.Background()).Err(), + wantSupport: false, + }, + { + name: "Invalid version falls back to false", + infoOutput: "# Server\r\nredis_version:invalid\r\nredis_git_sha1:00000000", + wantSupport: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create mock Redis client + db, mock := redismock.NewClientMock() + defer db.Close() + + // Setup mock expectation + if tt.infoError != nil { + mock.ExpectInfo("server").SetErr(tt.infoError) + } else { + mock.ExpectInfo("server").SetVal(tt.infoOutput) + } + + // Run test + got := SupportsRedisStreams(db) + + // Check expectations were met + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("Redis mock expectations not met: %v", err) + } + + // Verify results + if got != tt.wantSupport { + t.Errorf("SupportsRedisStreams() = %v, want %v", got, tt.wantSupport) + } + }) + } +} + +// TestSupportsRedisStreams_NoPanic ensures the function never panics +func TestSupportsRedisStreams_NoPanic(t *testing.T) { + // Test various error conditions to ensure no panic + testCases := []struct { + name string + infoOutput string + wantResult bool + }{ + { + name: "Empty response", + infoOutput: "", + wantResult: false, + }, + { + name: "Invalid format", + infoOutput: "invalid data", + wantResult: false, + }, + { + name: "Empty version", + infoOutput: "# Server\r\nredis_version:\r\nredis_git_sha1:00000000", + wantResult: false, + }, + { + name: "Version with just newline", + infoOutput: "# Server\r\nredis_version:\r\nredis_git_sha1:00000000", + wantResult: false, + }, + { + name: "Invalid dots", + infoOutput: "# Server\r\nredis_version:.....\r\nredis_git_sha1:00000000", + wantResult: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + db, mock := redismock.NewClientMock() + defer db.Close() + + mock.ExpectInfo("server").SetVal(tc.infoOutput) + + // This should not panic + result := SupportsRedisStreams(db) + + // Verify expectations were met + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("Redis mock expectations not met: %v", err) + } + + // Should always fall back to false on any error + if result != tc.wantResult { + t.Errorf("Expected %v for malformed input, got %v", tc.wantResult, result) + } + }) + } +} diff --git a/pkg/syncer/pubsub_test.go b/pkg/syncer/pubsub_test.go index 31b3dc1d..1b86c164 100644 --- a/pkg/syncer/pubsub_test.go +++ b/pkg/syncer/pubsub_test.go @@ -20,6 +20,7 @@ package syncer import ( "reflect" "testing" + "time" "github.com/optimizely/agent/config" "github.com/optimizely/agent/pkg/syncer/pubsub" @@ -28,7 +29,7 @@ import ( func TestNewPubSub(t *testing.T) { type args struct { conf config.SyncConfig - flag SycnFeatureFlag + flag SyncFeatureFlag } tests := []struct { name string @@ -52,12 +53,18 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, - want: &pubsub.Redis{ - Host: "localhost:6379", - Password: "", - Database: 0, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, }, wantErr: false, }, @@ -77,12 +84,18 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SycnFeatureFlagDatafile, + flag: SyncFeatureFlagDatafile, }, - want: &pubsub.Redis{ - Host: "localhost:6379", - Password: "", - Database: 0, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, }, wantErr: false, }, @@ -98,7 +111,7 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, @@ -115,7 +128,7 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, @@ -132,7 +145,7 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, @@ -153,7 +166,7 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, @@ -173,13 +186,13 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, }, { - name: "Test with invalid redis config without password", + name: "Test with valid redis config without password", args: args{ conf: config.SyncConfig{ Pubsub: map[string]interface{}{ @@ -193,10 +206,20 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, - want: nil, - wantErr: true, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", // Empty password is valid (no auth required) + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, + }, + wantErr: false, }, { name: "Test with invalid redis config without db", @@ -213,19 +236,19 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, want: nil, wantErr: true, }, { - name: "Test with invalid redis config with invalid password", + name: "Test with redis config with invalid password type (ignored)", args: args{ conf: config.SyncConfig{ Pubsub: map[string]interface{}{ "redis": map[string]interface{}{ "host": "localhost:6379", - "password": 1234, + "password": 1234, // Invalid type, will be ignored "database": 0, }, }, @@ -234,10 +257,20 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, }, - want: nil, - wantErr: true, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", // Invalid type ignored, falls back to empty string + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, + }, + wantErr: false, }, { name: "Test with invalid redis config with invalid database", @@ -255,7 +288,117 @@ func TestNewPubSub(t *testing.T) { Enable: true, }, }, - flag: SyncFeatureFlagNotificaiton, + flag: SyncFeatureFlagNotification, + }, + want: nil, + wantErr: true, + }, + { + name: "Test with auto-detected redis-streams with custom config", + args: args{ + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + "batch_size": 20, + "flush_interval": "10s", + "max_retries": 5, + "retry_delay": "200ms", + "max_retry_delay": "10s", + "connection_timeout": "15s", + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + flag: SyncFeatureFlagNotification, + }, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 20, + FlushInterval: 10000000000, // 10s in nanoseconds + MaxRetries: 5, + RetryDelay: 200000000, // 200ms in nanoseconds + MaxRetryDelay: 10000000000, // 10s in nanoseconds + ConnTimeout: 15000000000, // 15s in nanoseconds + }, + wantErr: false, + }, + { + name: "Test with auto-detected redis-streams for datafile", + args: args{ + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + Datafile: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + flag: SyncFeatureFlagDatafile, + }, + want: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 10, // default + FlushInterval: 5000000000, // 5s default in nanoseconds + MaxRetries: 3, // default + RetryDelay: 100000000, // 100ms default in nanoseconds + MaxRetryDelay: 5000000000, // 5s default in nanoseconds + ConnTimeout: 10000000000, // 10s default in nanoseconds + }, + wantErr: false, + }, + { + name: "Test with unsupported pubsub type", + args: args{ + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "unsupported-type", + Enable: true, + }, + }, + flag: SyncFeatureFlagNotification, + }, + want: nil, + wantErr: true, + }, + { + name: "Test with invalid feature flag", + args: args{ + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + flag: "invalid-flag", }, want: nil, wantErr: true, @@ -274,3 +417,585 @@ func TestNewPubSub(t *testing.T) { }) } } + +func TestGetIntFromConfig(t *testing.T) { + tests := []struct { + name string + config map[string]interface{} + key string + defaultValue int + want int + }{ + { + name: "Valid int value", + config: map[string]interface{}{ + "test_key": 42, + }, + key: "test_key", + defaultValue: 10, + want: 42, + }, + { + name: "Missing key returns default", + config: map[string]interface{}{ + "other_key": 42, + }, + key: "test_key", + defaultValue: 10, + want: 10, + }, + { + name: "Invalid type returns default", + config: map[string]interface{}{ + "test_key": "not an int", + }, + key: "test_key", + defaultValue: 10, + want: 10, + }, + { + name: "Nil value returns default", + config: map[string]interface{}{ + "test_key": nil, + }, + key: "test_key", + defaultValue: 10, + want: 10, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := getIntFromConfig(tt.config, tt.key, tt.defaultValue) + if got != tt.want { + t.Errorf("getIntFromConfig() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGetDurationFromConfig(t *testing.T) { + tests := []struct { + name string + config map[string]interface{} + key string + defaultValue time.Duration + want time.Duration + }{ + { + name: "Valid duration string", + config: map[string]interface{}{ + "test_key": "5s", + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 5 * time.Second, + }, + { + name: "Valid millisecond duration", + config: map[string]interface{}{ + "test_key": "100ms", + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 100 * time.Millisecond, + }, + { + name: "Missing key returns default", + config: map[string]interface{}{ + "other_key": "5s", + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 1 * time.Second, + }, + { + name: "Invalid duration string returns default", + config: map[string]interface{}{ + "test_key": "invalid duration", + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 1 * time.Second, + }, + { + name: "Non-string value returns default", + config: map[string]interface{}{ + "test_key": 123, + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 1 * time.Second, + }, + { + name: "Nil value returns default", + config: map[string]interface{}{ + "test_key": nil, + }, + key: "test_key", + defaultValue: 1 * time.Second, + want: 1 * time.Second, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := getDurationFromConfig(tt.config, tt.key, tt.defaultValue) + if got != tt.want { + t.Errorf("getDurationFromConfig() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNewPubSub_DatabaseTypeConversion(t *testing.T) { + tests := []struct { + name string + database interface{} + wantErr bool + }{ + { + name: "database as int", + database: 0, + wantErr: false, + }, + { + name: "database as float64 (from YAML/JSON)", + database: float64(0), + wantErr: false, + }, + { + name: "database as float64 non-zero", + database: float64(1), + wantErr: false, + }, + { + name: "database as string - should fail", + database: "0", + wantErr: true, + }, + { + name: "database as nil - should fail", + database: nil, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": tt.database, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + } + + _, err := newPubSub(conf, SyncFeatureFlagNotification) + if (err != nil) != tt.wantErr { + t.Errorf("newPubSub() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestGetPubSubRedis_DirectCall(t *testing.T) { + tests := []struct { + name string + conf config.SyncConfig + want PubSub + wantErr bool + }{ + { + name: "Valid Pub/Sub config", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + }, + want: &pubsub.Redis{ + Host: "localhost:6379", + Password: "", + Database: 0, + }, + wantErr: false, + }, + { + name: "Pub/Sub with database as float64", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "secret", + "database": float64(1), + }, + }, + }, + want: &pubsub.Redis{ + Host: "localhost:6379", + Password: "secret", + Database: 1, + }, + wantErr: false, + }, + { + name: "Pub/Sub config not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "not-redis": map[string]interface{}{}, + }, + }, + want: nil, + wantErr: true, + }, + { + name: "Pub/Sub config not valid (not a map)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": "invalid-config", + }, + }, + want: nil, + wantErr: true, + }, + { + name: "Pub/Sub host not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "password": "", + "database": 0, + }, + }, + }, + want: nil, + wantErr: true, + }, + { + name: "Pub/Sub host not valid (not a string)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": 123, + "password": "", + "database": 0, + }, + }, + }, + want: nil, + wantErr: true, + }, + { + name: "Pub/Sub database not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + }, + }, + }, + want: nil, + wantErr: true, + }, + { + name: "Pub/Sub database invalid type", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": "invalid", + }, + }, + }, + want: nil, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := getPubSubRedis(tt.conf) + if (err != nil) != tt.wantErr { + t.Errorf("getPubSubRedis() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("getPubSubRedis() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGetPubSubRedisStreams_DirectCall(t *testing.T) { + tests := []struct { + name string + conf config.SyncConfig + wantErr bool + }{ + { + name: "Streams config not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "not-redis": map[string]interface{}{}, + }, + }, + wantErr: true, + }, + { + name: "Streams config not valid (not a map)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": "invalid-config", + }, + }, + wantErr: true, + }, + { + name: "Streams host not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "password": "", + "database": 0, + }, + }, + }, + wantErr: true, + }, + { + name: "Streams host not valid (not a string)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": 123, + "password": "", + "database": 0, + }, + }, + }, + wantErr: true, + }, + { + name: "Streams database not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + }, + }, + }, + wantErr: true, + }, + { + name: "Streams database as float64 (valid)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": float64(1), + }, + }, + }, + wantErr: false, + }, + { + name: "Streams database invalid type", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": "invalid", + }, + }, + }, + wantErr: true, + }, + { + name: "Streams with valid config", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := getPubSubRedisStreams(tt.conf) + if (err != nil) != tt.wantErr { + t.Errorf("getPubSubRedisStreams() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestGetPubSubRedisStreams_ErrorPaths(t *testing.T) { + tests := []struct { + name string + conf config.SyncConfig + wantErr bool + }{ + { + name: "redis config not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "not-redis": map[string]interface{}{}, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "redis config not valid (not a map)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": "invalid-config", + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "redis host not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "password": "", + "database": 0, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "redis host not valid (not a string)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": 123, + "password": "", + "database": 0, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "redis database not found", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "redis database as float64 (valid)", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": float64(1), + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: false, + }, + { + name: "redis database invalid type", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": "invalid", + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + }, + wantErr: true, + }, + { + name: "datafile with unsupported pubsub type", + conf: config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + Datafile: config.FeatureSyncConfig{ + Default: "unsupported-type", + Enable: true, + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var err error + if tt.conf.Notification.Default != "" { + _, err = newPubSub(tt.conf, SyncFeatureFlagNotification) + } else { + _, err = newPubSub(tt.conf, SyncFeatureFlagDatafile) + } + + if (err != nil) != tt.wantErr { + t.Errorf("newPubSub() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/syncer/syncer.go b/pkg/syncer/syncer.go index 7f2f35f1..0493b878 100644 --- a/pkg/syncer/syncer.go +++ b/pkg/syncer/syncer.go @@ -75,7 +75,7 @@ func NewSyncedNotificationCenter(ctx context.Context, sdkKey string, conf config logger = &log.Logger } - pubsub, err := newPubSub(conf, SyncFeatureFlagNotificaiton) + pubsub, err := newPubSub(conf, SyncFeatureFlagNotification) if err != nil { return nil, err } @@ -132,7 +132,7 @@ type DatafileSyncer struct { } func NewDatafileSyncer(conf config.SyncConfig) (*DatafileSyncer, error) { - pubsub, err := newPubSub(conf, SycnFeatureFlagDatafile) + pubsub, err := newPubSub(conf, SyncFeatureFlagDatafile) if err != nil { return nil, err } diff --git a/pkg/syncer/syncer_test.go b/pkg/syncer/syncer_test.go index 01f407f7..42f2128c 100644 --- a/pkg/syncer/syncer_test.go +++ b/pkg/syncer/syncer_test.go @@ -21,6 +21,7 @@ import ( "context" "reflect" "testing" + "time" "github.com/optimizely/agent/config" "github.com/optimizely/agent/pkg/syncer/pubsub" @@ -80,10 +81,16 @@ func TestNewSyncedNotificationCenter(t *testing.T) { ctx: context.Background(), logger: &log.Logger, sdkKey: "123", - pubsub: &pubsub.Redis{ - Host: "localhost:6379", - Password: "", - Database: 0, + pubsub: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, }, }, wantErr: false, @@ -161,10 +168,16 @@ func TestNewDatafileSyncer(t *testing.T) { }, }, want: &DatafileSyncer{ - pubsub: &pubsub.Redis{ - Host: "localhost:6379", - Password: "", - Database: 0, + pubsub: &pubsub.RedisStreams{ + Host: "localhost:6379", + Password: "", + Database: 0, + BatchSize: 10, + FlushInterval: 5 * time.Second, + MaxRetries: 3, + RetryDelay: 100 * time.Millisecond, + MaxRetryDelay: 5 * time.Second, + ConnTimeout: 10 * time.Second, }, }, wantErr: false, @@ -384,3 +397,89 @@ func TestSyncedNotificationCenter_Subscribe(t *testing.T) { }) } } + +func TestNewSyncedNotificationCenter_CacheHit(t *testing.T) { + // Clear cache before test + ncCache = make(map[string]NotificationSyncer) + + conf := config.SyncConfig{ + Pubsub: map[string]interface{}{ + "redis": map[string]interface{}{ + "host": "localhost:6379", + "password": "", + "database": 0, + }, + }, + Notification: config.FeatureSyncConfig{ + Default: "redis", + Enable: true, + }, + } + + sdkKey := "test-sdk-key" + ctx := context.Background() + + // First call - should create new instance + nc1, err := NewSyncedNotificationCenter(ctx, sdkKey, conf) + assert.NoError(t, err) + assert.NotNil(t, nc1) + + // Second call with same sdkKey - should return cached instance + nc2, err := NewSyncedNotificationCenter(ctx, sdkKey, conf) + assert.NoError(t, err) + assert.NotNil(t, nc2) + + // Should be the same instance (cache hit) + assert.Equal(t, nc1, nc2) +} + +func TestSyncedNotificationCenter_AddHandler(t *testing.T) { + nc := &SyncedNotificationCenter{ + ctx: context.Background(), + logger: &log.Logger, + sdkKey: "test", + pubsub: &testPubSub{}, + } + + id, err := nc.AddHandler(notification.Decision, func(interface{}) {}) + assert.NoError(t, err) + assert.Equal(t, 0, id) +} + +func TestSyncedNotificationCenter_RemoveHandler(t *testing.T) { + nc := &SyncedNotificationCenter{ + ctx: context.Background(), + logger: &log.Logger, + sdkKey: "test", + pubsub: &testPubSub{}, + } + + err := nc.RemoveHandler(0, notification.Decision) + assert.NoError(t, err) +} + +func TestSyncedNotificationCenter_Send_MarshalError(t *testing.T) { + nc := &SyncedNotificationCenter{ + ctx: context.Background(), + logger: &log.Logger, + sdkKey: "test", + pubsub: &testPubSub{}, + } + + // Pass a channel which cannot be marshaled to JSON + ch := make(chan int) + err := nc.Send(notification.Decision, ch) + assert.Error(t, err) +} + +func TestGetDatafileSyncChannel(t *testing.T) { + result := GetDatafileSyncChannel() + expected := "optimizely-sync-datafile" + assert.Equal(t, expected, result) +} + +func TestGetChannelForSDKKey(t *testing.T) { + result := GetChannelForSDKKey("test-channel", "sdk-123") + expected := "test-channel-sdk-123" + assert.Equal(t, expected, result) +} diff --git a/pkg/utils/redisauth/password.go b/pkg/utils/redisauth/password.go new file mode 100644 index 00000000..08bb2145 --- /dev/null +++ b/pkg/utils/redisauth/password.go @@ -0,0 +1,109 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +// Package redisauth provides utilities for Redis authentication configuration +package redisauth + +import ( + "encoding/json" + "os" +) + +// GetPassword safely extracts Redis password from config with flexible field names and env var fallback +// +// Supports multiple field names to avoid security scanning alerts on "password" keyword: +// - auth_token (preferred) +// - redis_secret (alternative) +// - password (legacy support) +// +// If no config field is found or all are empty, falls back to environment variable. +// Returns empty string if no password is configured (valid for Redis without auth). +// +// Parameters: +// - config: map containing Redis configuration +// - envVar: environment variable name to check as fallback (e.g., "REDIS_PASSWORD") +// +// Returns: +// - password string (may be empty for Redis without authentication) +func GetPassword(config map[string]interface{}, envVar string) string { + // Try each key in order of preference + keys := []string{"auth_token", "redis_secret", "password"} + + for _, key := range keys { + if val, found := config[key]; found { + if strVal, ok := val.(string); ok && strVal != "" { + return strVal + } + } + } + + // Fallback to environment variable + if envVar != "" { + if envVal := os.Getenv(envVar); envVal != "" { + return envVal + } + } + + // Return empty string if not found (for Redis, empty password is valid) + return "" +} + +// UnmarshalWithPasswordExtraction provides shared logic for unmarshaling Redis configurations +// with flexible password field name support. +// +// This helper function handles the common pattern used across Redis cache implementations: +// 1. Unmarshal JSON data into the provided target struct (using alias to avoid recursion) +// 2. Extract password from flexible field names (auth_token, redis_secret, password) +// 3. Return the extracted password for the caller to set on their struct +// +// Parameters: +// - data: JSON bytes to unmarshal +// - target: pointer to the struct to unmarshal into (should be an alias type to avoid recursion) +// - envVar: environment variable name for password fallback (e.g., "REDIS_ODP_PASSWORD") +// +// Returns: +// - password: extracted password string (may be empty) +// - error: any error from unmarshaling +// +// Example usage in a struct's UnmarshalJSON method: +// +// func (r *RedisCache) UnmarshalJSON(data []byte) error { +// type Alias RedisCache +// alias := (*Alias)(r) +// password, err := redisauth.UnmarshalWithPasswordExtraction(data, alias, "REDIS_PASSWORD") +// if err != nil { +// return err +// } +// r.Password = password +// return nil +// } +func UnmarshalWithPasswordExtraction(data []byte, target interface{}, envVar string) (string, error) { + // Unmarshal normally to get all fields + if err := json.Unmarshal(data, target); err != nil { + return "", err + } + + // Parse raw config to extract password with flexible field names + var rawConfig map[string]interface{} + if err := json.Unmarshal(data, &rawConfig); err != nil { + return "", err + } + + // Use GetPassword to extract password from flexible field names or env var + password := GetPassword(rawConfig, envVar) + + return password, nil +} diff --git a/pkg/utils/redisauth/password_test.go b/pkg/utils/redisauth/password_test.go new file mode 100644 index 00000000..9a755f6c --- /dev/null +++ b/pkg/utils/redisauth/password_test.go @@ -0,0 +1,403 @@ +/**************************************************************************** + * Copyright 2025 Optimizely, Inc. and contributors * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +package redisauth + +import ( + "encoding/json" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetPassword(t *testing.T) { + tests := []struct { + name string + config map[string]interface{} + envVar string + envValue string + want string + }{ + { + name: "auth_token has highest priority", + config: map[string]interface{}{ + "auth_token": "token123", + "redis_secret": "secret456", + "password": "password789", + }, + envVar: "TEST_ENV", + want: "token123", + }, + { + name: "redis_secret used when auth_token missing", + config: map[string]interface{}{ + "redis_secret": "secret456", + "password": "password789", + }, + envVar: "TEST_ENV", + want: "secret456", + }, + { + name: "password used when auth_token and redis_secret missing", + config: map[string]interface{}{ + "password": "password789", + }, + envVar: "TEST_ENV", + want: "password789", + }, + { + name: "environment variable used when no config fields present", + config: map[string]interface{}{ + "host": "localhost:6379", + "database": 0, + }, + envVar: "TEST_ENV", + envValue: "env_password", + want: "env_password", + }, + { + name: "empty string when no password configured", + config: map[string]interface{}{ + "host": "localhost:6379", + "database": 0, + }, + envVar: "TEST_ENV", + want: "", + }, + { + name: "empty field values are ignored", + config: map[string]interface{}{ + "auth_token": "", + "redis_secret": "", + "password": "password789", + }, + envVar: "TEST_ENV", + want: "password789", + }, + { + name: "non-string values are ignored", + config: map[string]interface{}{ + "auth_token": 12345, // Invalid type + "password": "password789", + }, + envVar: "TEST_ENV", + want: "password789", + }, + { + name: "config fields take precedence over env var", + config: map[string]interface{}{ + "password": "config_password", + }, + envVar: "TEST_ENV", + envValue: "env_password", + want: "config_password", + }, + { + name: "empty env var name is handled gracefully", + config: map[string]interface{}{ + "host": "localhost:6379", + }, + envVar: "", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set environment variable if needed + if tt.envValue != "" { + os.Setenv(tt.envVar, tt.envValue) + defer os.Unsetenv(tt.envVar) + } else { + // Ensure env var is not set + os.Unsetenv(tt.envVar) + } + + got := GetPassword(tt.config, tt.envVar) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestGetPassword_RealWorldScenarios(t *testing.T) { + t.Run("Kubernetes secret via env var", func(t *testing.T) { + os.Setenv("REDIS_PASSWORD", "k8s-secret-value") + defer os.Unsetenv("REDIS_PASSWORD") + + config := map[string]interface{}{ + "host": "redis-service:6379", + "database": 0, + } + + got := GetPassword(config, "REDIS_PASSWORD") + assert.Equal(t, "k8s-secret-value", got) + }) + + t.Run("Development config without auth", func(t *testing.T) { + config := map[string]interface{}{ + "host": "localhost:6379", + "database": 0, + } + + got := GetPassword(config, "REDIS_PASSWORD") + assert.Equal(t, "", got) + }) + + t.Run("Production config with auth_token", func(t *testing.T) { + config := map[string]interface{}{ + "host": "redis.production.example.com:6379", + "auth_token": "prod-token-12345", + "database": 1, + } + + got := GetPassword(config, "REDIS_PASSWORD") + assert.Equal(t, "prod-token-12345", got) + }) + + t.Run("Legacy config with password field", func(t *testing.T) { + config := map[string]interface{}{ + "host": "legacy-redis:6379", + "password": "legacy-pass", + "database": 0, + } + + got := GetPassword(config, "REDIS_PASSWORD") + assert.Equal(t, "legacy-pass", got) + }) +} + +func TestUnmarshalWithPasswordExtraction(t *testing.T) { + // Test struct to unmarshal into + type TestRedisConfig struct { + Host string `json:"host"` + Password string `json:"password"` + Database int `json:"database"` + } + + tests := []struct { + name string + jsonData string + envVar string + envValue string + wantPassword string + wantErr bool + wantHost string + wantDatabase int + }{ + { + name: "unmarshal with auth_token field", + jsonData: `{"host":"localhost:6379","auth_token":"token123","database":0}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "token123", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 0, + }, + { + name: "unmarshal with redis_secret field", + jsonData: `{"host":"localhost:6379","redis_secret":"secret456","database":1}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "secret456", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 1, + }, + { + name: "unmarshal with password field", + jsonData: `{"host":"localhost:6379","password":"pass789","database":2}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "pass789", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 2, + }, + { + name: "unmarshal with env var fallback", + jsonData: `{"host":"localhost:6379","database":0}`, + envVar: "TEST_REDIS_PASSWORD", + envValue: "env_password", + wantPassword: "env_password", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 0, + }, + { + name: "unmarshal with no password configured", + jsonData: `{"host":"localhost:6379","database":0}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 0, + }, + { + name: "priority: auth_token over redis_secret", + jsonData: `{"host":"localhost:6379","auth_token":"token","redis_secret":"secret","password":"pass","database":0}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "token", + wantErr: false, + wantHost: "localhost:6379", + wantDatabase: 0, + }, + { + name: "invalid JSON returns error", + jsonData: `{"host":"localhost:6379","invalid json`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "", + wantErr: true, + }, + { + name: "empty JSON object", + jsonData: `{}`, + envVar: "TEST_REDIS_PASSWORD", + wantPassword: "", + wantErr: false, + wantHost: "", + wantDatabase: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set environment variable if needed + if tt.envValue != "" { + os.Setenv(tt.envVar, tt.envValue) + defer os.Unsetenv(tt.envVar) + } else { + os.Unsetenv(tt.envVar) + } + + // Create target struct + var config TestRedisConfig + + // Call the function + password, err := UnmarshalWithPasswordExtraction([]byte(tt.jsonData), &config, tt.envVar) + + // Check error + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + + // Verify password extraction + assert.Equal(t, tt.wantPassword, password) + + // Verify other fields were unmarshaled correctly + assert.Equal(t, tt.wantHost, config.Host) + assert.Equal(t, tt.wantDatabase, config.Database) + }) + } +} + +func TestUnmarshalWithPasswordExtraction_AliasPattern(t *testing.T) { + // This test verifies that the alias pattern works correctly + // and prevents infinite recursion + + type RedisConfig struct { + Host string `json:"host"` + Password string `json:"-"` // Not unmarshaled from JSON + Database int `json:"database"` + } + + // Simulate the alias pattern used in actual implementations + jsonData := `{"host":"localhost:6379","auth_token":"secret123","database":1}` + + var config RedisConfig + type Alias RedisConfig + alias := (*Alias)(&config) + + // This should not cause infinite recursion + password, err := UnmarshalWithPasswordExtraction([]byte(jsonData), alias, "TEST_PASSWORD") + + require.NoError(t, err) + assert.Equal(t, "secret123", password) + assert.Equal(t, "localhost:6379", config.Host) + assert.Equal(t, 1, config.Database) +} + +func TestUnmarshalWithPasswordExtraction_StructWithNestedFields(t *testing.T) { + // Test with a more complex struct to ensure general unmarshaling works + + type ComplexRedisConfig struct { + Host string `json:"host"` + Password string `json:"password"` + Database int `json:"database"` + Options map[string]int `json:"options"` + } + + jsonData := `{ + "host":"redis.example.com:6379", + "auth_token":"complex_token", + "database":5, + "options":{"maxRetries":3,"timeout":30} + }` + + var config ComplexRedisConfig + password, err := UnmarshalWithPasswordExtraction([]byte(jsonData), &config, "") + + require.NoError(t, err) + assert.Equal(t, "complex_token", password) + assert.Equal(t, "redis.example.com:6379", config.Host) + assert.Equal(t, 5, config.Database) + assert.Equal(t, 3, config.Options["maxRetries"]) + assert.Equal(t, 30, config.Options["timeout"]) +} + +func TestUnmarshalWithPasswordExtraction_MalformedJSON(t *testing.T) { + type RedisConfig struct { + Host string `json:"host"` + Database int `json:"database"` + } + + tests := []struct { + name string + jsonData string + }{ + { + name: "incomplete JSON", + jsonData: `{"host":"localhost:6379"`, + }, + { + name: "invalid syntax", + jsonData: `{host:localhost}`, + }, + { + name: "trailing comma", + jsonData: `{"host":"localhost:6379",}`, + }, + { + name: "empty string", + jsonData: ``, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var config RedisConfig + _, err := UnmarshalWithPasswordExtraction([]byte(tt.jsonData), &config, "TEST_PASSWORD") + assert.Error(t, err, "Expected error for malformed JSON") + + // Verify it's a JSON unmarshal error + var jsonErr *json.SyntaxError + var jsonTypeErr *json.UnmarshalTypeError + isJSONError := assert.ErrorAs(t, err, &jsonErr) || assert.ErrorAs(t, err, &jsonTypeErr) || err.Error() == "unexpected end of JSON input" + assert.True(t, isJSONError, "Error should be a JSON unmarshal error") + }) + } +} diff --git a/plugins/cmabcache/services/redis_cache.go b/plugins/cmabcache/services/redis_cache.go index 6b70ef21..5a41c7d2 100644 --- a/plugins/cmabcache/services/redis_cache.go +++ b/plugins/cmabcache/services/redis_cache.go @@ -22,6 +22,7 @@ import ( "encoding/json" "github.com/go-redis/redis/v8" + "github.com/optimizely/agent/pkg/utils/redisauth" "github.com/optimizely/agent/plugins/cmabcache" "github.com/optimizely/agent/plugins/utils" "github.com/optimizely/go-sdk/v2/pkg/cache" @@ -39,6 +40,24 @@ type RedisCache struct { Timeout utils.Duration `json:"timeout"` } +// UnmarshalJSON implements custom JSON unmarshaling with flexible password field names +// Supports: auth_token, redis_secret, password (in order of preference) +// Fallback: REDIS_CMAB_PASSWORD environment variable +func (r *RedisCache) UnmarshalJSON(data []byte) error { + // Use an alias type to avoid infinite recursion + type Alias RedisCache + alias := (*Alias)(r) + + // Use shared unmarshal logic with password extraction + password, err := redisauth.UnmarshalWithPasswordExtraction(data, alias, "REDIS_CMAB_PASSWORD") + if err != nil { + return err + } + + r.Password = password + return nil +} + // Lookup is used to retrieve cached CMAB decisions func (r *RedisCache) Lookup(key string) interface{} { // This is required in both lookup and save since an old redis instance can also be used diff --git a/plugins/cmabcache/services/redis_cache_test.go b/plugins/cmabcache/services/redis_cache_test.go index b851cb65..621a9f88 100644 --- a/plugins/cmabcache/services/redis_cache_test.go +++ b/plugins/cmabcache/services/redis_cache_test.go @@ -18,6 +18,7 @@ package services import ( + "encoding/json" "testing" "time" @@ -97,6 +98,69 @@ func (r *RedisCacheTestSuite) TestClientConfiguration() { r.Equal(1, r.cache.Client.Options().DB) } +func TestRedisCache_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + json string + wantPassword string + wantErr bool + }{ + { + name: "auth_token has priority", + json: `{"host":"localhost:6379","auth_token":"token123","password":"pass456","database":0}`, + wantPassword: "token123", + wantErr: false, + }, + { + name: "redis_secret when auth_token missing", + json: `{"host":"localhost:6379","redis_secret":"secret789","password":"pass456","database":0}`, + wantPassword: "secret789", + wantErr: false, + }, + { + name: "password when others missing", + json: `{"host":"localhost:6379","password":"pass456","database":0}`, + wantPassword: "pass456", + wantErr: false, + }, + { + name: "empty when no password fields", + json: `{"host":"localhost:6379","database":0}`, + wantPassword: "", + wantErr: false, + }, + { + name: "invalid json", + json: `{invalid}`, + wantPassword: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var cache RedisCache + err := json.Unmarshal([]byte(tt.json), &cache) + + if tt.wantErr { + if err == nil { + t.Errorf("UnmarshalJSON() error = nil, wantErr %v", tt.wantErr) + } + return + } + + if err != nil { + t.Errorf("UnmarshalJSON() unexpected error = %v", err) + return + } + + if cache.Password != tt.wantPassword { + t.Errorf("UnmarshalJSON() Password = %v, want %v", cache.Password, tt.wantPassword) + } + }) + } +} + func TestRedisCacheTestSuite(t *testing.T) { suite.Run(t, new(RedisCacheTestSuite)) } diff --git a/plugins/odpcache/services/redis_cache.go b/plugins/odpcache/services/redis_cache.go index c52f847c..3df642a2 100644 --- a/plugins/odpcache/services/redis_cache.go +++ b/plugins/odpcache/services/redis_cache.go @@ -22,6 +22,7 @@ import ( "encoding/json" "github.com/go-redis/redis/v8" + "github.com/optimizely/agent/pkg/utils/redisauth" "github.com/optimizely/agent/plugins/odpcache" "github.com/optimizely/agent/plugins/utils" "github.com/optimizely/go-sdk/v2/pkg/cache" @@ -39,6 +40,24 @@ type RedisCache struct { Timeout utils.Duration `json:"timeout"` } +// UnmarshalJSON implements custom JSON unmarshaling with flexible password field names +// Supports: auth_token, redis_secret, password (in order of preference) +// Fallback: REDIS_ODP_PASSWORD environment variable +func (r *RedisCache) UnmarshalJSON(data []byte) error { + // Use an alias type to avoid infinite recursion + type Alias RedisCache + alias := (*Alias)(r) + + // Use shared unmarshal logic with password extraction + password, err := redisauth.UnmarshalWithPasswordExtraction(data, alias, "REDIS_ODP_PASSWORD") + if err != nil { + return err + } + + r.Password = password + return nil +} + // Lookup is used to retrieve segments func (r *RedisCache) Lookup(key string) (segments interface{}) { // This is required in both lookup and save since an old redis instance can also be used diff --git a/plugins/odpcache/services/redis_cache_test.go b/plugins/odpcache/services/redis_cache_test.go index e54b0fdc..61bf3427 100644 --- a/plugins/odpcache/services/redis_cache_test.go +++ b/plugins/odpcache/services/redis_cache_test.go @@ -63,3 +63,57 @@ func (r *RedisCacheTestSuite) TestLookupNotSavedKey() { func TestRedisCacheTestSuite(t *testing.T) { suite.Run(t, new(RedisCacheTestSuite)) } + +func TestRedisCache_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + json string + wantPassword string + wantErr bool + }{ + { + name: "auth_token has priority", + json: `{"host":"localhost:6379","auth_token":"token123","password":"pass456","database":0}`, + wantPassword: "token123", + wantErr: false, + }, + { + name: "redis_secret when auth_token missing", + json: `{"host":"localhost:6379","redis_secret":"secret789","password":"pass456","database":0}`, + wantPassword: "secret789", + wantErr: false, + }, + { + name: "password when others missing", + json: `{"host":"localhost:6379","password":"pass456","database":0}`, + wantPassword: "pass456", + wantErr: false, + }, + { + name: "empty when no password fields", + json: `{"host":"localhost:6379","database":0}`, + wantPassword: "", + wantErr: false, + }, + { + name: "invalid json", + json: `{invalid}`, + wantPassword: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var cache RedisCache + err := cache.UnmarshalJSON([]byte(tt.json)) + if (err != nil) != tt.wantErr { + t.Errorf("UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && cache.Password != tt.wantPassword { + t.Errorf("UnmarshalJSON() Password = %v, want %v", cache.Password, tt.wantPassword) + } + }) + } +} diff --git a/plugins/userprofileservice/services/redis_ups.go b/plugins/userprofileservice/services/redis_ups.go index d57ab2dd..c5dabf9e 100644 --- a/plugins/userprofileservice/services/redis_ups.go +++ b/plugins/userprofileservice/services/redis_ups.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-redis/redis/v8" + "github.com/optimizely/agent/pkg/utils/redisauth" "github.com/optimizely/agent/plugins/userprofileservice" "github.com/optimizely/go-sdk/v2/pkg/decision" "github.com/rs/zerolog/log" @@ -39,6 +40,24 @@ type RedisUserProfileService struct { Database int `json:"database"` } +// UnmarshalJSON implements custom JSON unmarshaling with flexible password field names +// Supports: auth_token, redis_secret, password (in order of preference) +// Fallback: REDIS_UPS_PASSWORD environment variable +func (u *RedisUserProfileService) UnmarshalJSON(data []byte) error { + // Use an alias type to avoid infinite recursion + type Alias RedisUserProfileService + alias := (*Alias)(u) + + // Use shared unmarshal logic with password extraction + password, err := redisauth.UnmarshalWithPasswordExtraction(data, alias, "REDIS_UPS_PASSWORD") + if err != nil { + return err + } + + u.Password = password + return nil +} + // Lookup is used to retrieve past bucketing decisions for users func (u *RedisUserProfileService) Lookup(userID string) (profile decision.UserProfile) { profile = decision.UserProfile{ diff --git a/plugins/userprofileservice/services/redis_ups_test.go b/plugins/userprofileservice/services/redis_ups_test.go index 3e212f3b..6fac69e0 100644 --- a/plugins/userprofileservice/services/redis_ups_test.go +++ b/plugins/userprofileservice/services/redis_ups_test.go @@ -75,3 +75,57 @@ func (r *RedisUPSTestSuite) TestLookupNotSavedProfileID() { func TestRedisUPSTestSuite(t *testing.T) { suite.Run(t, new(RedisUPSTestSuite)) } + +func TestRedisUserProfileService_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + json string + wantPassword string + wantErr bool + }{ + { + name: "auth_token has priority", + json: `{"host":"localhost:6379","auth_token":"token123","password":"pass456","database":0}`, + wantPassword: "token123", + wantErr: false, + }, + { + name: "redis_secret when auth_token missing", + json: `{"host":"localhost:6379","redis_secret":"secret789","password":"pass456","database":0}`, + wantPassword: "secret789", + wantErr: false, + }, + { + name: "password when others missing", + json: `{"host":"localhost:6379","password":"pass456","database":0}`, + wantPassword: "pass456", + wantErr: false, + }, + { + name: "empty when no password fields", + json: `{"host":"localhost:6379","database":0}`, + wantPassword: "", + wantErr: false, + }, + { + name: "invalid json", + json: `{invalid}`, + wantPassword: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var ups RedisUserProfileService + err := ups.UnmarshalJSON([]byte(tt.json)) + if (err != nil) != tt.wantErr { + t.Errorf("UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && ups.Password != tt.wantPassword { + t.Errorf("UnmarshalJSON() Password = %v, want %v", ups.Password, tt.wantPassword) + } + }) + } +}