From add25b541c86479f1f3c72fa3a1ffc851c024244 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Sat, 28 Feb 2026 12:13:11 +0800 Subject: [PATCH 1/2] fix(#40): resolve cursor pagination and index coverage issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Codex security review findings from PR #38 merge: ## 🔴 High Priority - Cursor Pagination Compatibility **Problem**: DB uses base64 offsets, ScrapingBee uses page:N format - Cursor format mismatch caused pagination resets mid-pagination - Empty DB pages fell back to ScrapingBee, restarting at page 1 - sort=hot missing from sortMap, degraded to MAGIC on fallback - Legitimate empty results wasted ScrapingBee credits **Fixes**: 1. Added "hot" to sortMap → prevents sort degradation on fallback 2. Prevent cursor cycling: once paginating (cursor != ""), stay on DB - Only fall back to ScrapingBee on first load (cursor == "") - Avoids base64 ↔ page:N format conflicts 3. Return empty array instead of falling back when paginating past end ## 🟡 Medium Priority - Comprehensive Index Coverage **Problem**: Indexes didn't match actual query patterns - Missing deadline predicate in indexes → scans expired rows - Missing category+newest, category+ending indexes - Missing percent_funded tie-breaker in category+trending **Fixes**: Added 6 comprehensive composite indexes ```sql -- All include (state, deadline, ...) to filter expired rows early idx_campaigns_trending (state, deadline, velocity_24h DESC, percent_funded DESC) idx_campaigns_newest (state, deadline, first_seen_at DESC) idx_campaigns_ending (state, deadline ASC) idx_campaigns_category_trending (state, deadline, category_id, velocity_24h DESC, percent_funded DESC) idx_campaigns_category_newest (state, deadline, category_id, first_seen_at DESC) idx_campaigns_category_ending (state, deadline, category_id) ``` **Impact**: - Prevents table scans of expired rows as data grows - Covers all query patterns (4 sorts × 2 filtering modes) - Includes all ORDER BY columns for optimal performance ## References - Codex review: commit 1173168 - Issue: #40 Co-Authored-By: Claude Sonnet 4.5 --- backend/internal/db/db.go | 21 +++++++++++--- backend/internal/handler/campaigns.go | 41 ++++++++++++++++----------- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/backend/internal/db/db.go b/backend/internal/db/db.go index 8584d4e..6166df4 100644 --- a/backend/internal/db/db.go +++ b/backend/internal/db/db.go @@ -155,19 +155,32 @@ func Init(cfg *config.Config) error { } // Create composite indexes for optimal query performance - // These support the common query patterns: WHERE state='live' ORDER BY ... + // All queries filter WHERE state='live' AND deadline >= NOW(), so deadline comes first + // to exclude expired rows early and avoid table scans as expired data grows if err := DB.Exec(` + -- Trending: WHERE state='live' AND deadline >= NOW() ORDER BY velocity_24h DESC, percent_funded DESC CREATE INDEX IF NOT EXISTS idx_campaigns_trending - ON campaigns(state, velocity_24h DESC, percent_funded DESC); + ON campaigns(state, deadline, velocity_24h DESC, percent_funded DESC); + -- Newest: WHERE state='live' AND deadline >= NOW() ORDER BY first_seen_at DESC CREATE INDEX IF NOT EXISTS idx_campaigns_newest - ON campaigns(state, first_seen_at DESC); + ON campaigns(state, deadline, first_seen_at DESC); + -- Ending: WHERE state='live' AND deadline >= NOW() ORDER BY deadline ASC CREATE INDEX IF NOT EXISTS idx_campaigns_ending ON campaigns(state, deadline ASC); + -- Category+Trending: WHERE state='live' AND deadline >= NOW() AND category_id=? ORDER BY velocity_24h DESC, percent_funded DESC CREATE INDEX IF NOT EXISTS idx_campaigns_category_trending - ON campaigns(state, category_id, velocity_24h DESC); + ON campaigns(state, deadline, category_id, velocity_24h DESC, percent_funded DESC); + + -- Category+Newest: WHERE state='live' AND deadline >= NOW() AND category_id=? ORDER BY first_seen_at DESC + CREATE INDEX IF NOT EXISTS idx_campaigns_category_newest + ON campaigns(state, deadline, category_id, first_seen_at DESC); + + -- Category+Ending: WHERE state='live' AND deadline >= NOW() AND category_id=? ORDER BY deadline ASC + CREATE INDEX IF NOT EXISTS idx_campaigns_category_ending + ON campaigns(state, deadline, category_id); `).Error; err != nil { return fmt.Errorf("create composite indexes: %w", err) } diff --git a/backend/internal/handler/campaigns.go b/backend/internal/handler/campaigns.go index a6a367f..67b8068 100644 --- a/backend/internal/handler/campaigns.go +++ b/backend/internal/handler/campaigns.go @@ -14,6 +14,7 @@ import ( var sortMap = map[string]string{ "trending": "MAGIC", + "hot": "MAGIC", // Fallback uses MAGIC for hot sort (close approximation) "newest": "NEWEST", "ending": "END_DATE", } @@ -64,28 +65,34 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc { q = q.Where("category_id = ?", categoryID) } - // Only return DB results if we have data; otherwise fall through to ScrapingBee - if err := q.Find(&campaigns).Error; err == nil && len(campaigns) > 0 { - hasMore := len(campaigns) > limit - if hasMore { - campaigns = campaigns[:limit] + // Return DB results if we have data + // Note: Once DB is seeded, always use DB to avoid cursor format conflicts + // (DB uses base64 offsets, ScrapingBee uses page:N format) + if err := q.Find(&campaigns).Error; err == nil { + // Even if empty, return DB results if we're paginating (cursor != "") + // This prevents cursor format cycling between DB and ScrapingBee + if len(campaigns) > 0 || cursor != "" { + hasMore := len(campaigns) > limit + if hasMore { + campaigns = campaigns[:limit] + } + + var nextCursor interface{} + if hasMore { + nextOffset := offset + limit + nextCursor = base64.StdEncoding.EncodeToString([]byte(strconv.Itoa(nextOffset))) + } + + // Don't include total for DB queries (we don't track global count) + c.JSON(http.StatusOK, gin.H{"campaigns": campaigns, "next_cursor": nextCursor}) + return } - - var nextCursor interface{} - if hasMore { - nextOffset := offset + limit - nextCursor = base64.StdEncoding.EncodeToString([]byte(strconv.Itoa(nextOffset))) - } - - // Don't include total for DB queries (we don't track global count) - c.JSON(http.StatusOK, gin.H{"campaigns": campaigns, "next_cursor": nextCursor}) - return } - // If DB query fails or returns empty, fall through to ScrapingBee fallback + // Only fall through to ScrapingBee on first load (cursor == "") and DB empty/failed } // ScrapingBee fallback only for: - // - DB unavailable or empty (cold start, failed query) + // - First load (cursor == "") when DB is unavailable or empty (cold start) // - SearchCampaigns endpoint (user search with query text) gqlSort, ok := sortMap[sort] if !ok { From 53e86c657bbe10bfbaca6e985f418c46fa0e02d5 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Sat, 28 Feb 2026 12:22:00 +0800 Subject: [PATCH 2/2] fix: resolve cursor format detection and index migration bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address both Codex review findings from PR #41: ## 🔴 High Priority - Cursor Format Detection **Problem**: Mixed cursor formats still caused pagination resets - ScrapingBee returns cursor like "page:2" - Code tried to decode "page:2" as base64 → failed → offset=0 - Resulted in returning first page again (duplicate results) **Fix**: Detect cursor source and maintain format consistency ```go // Detect ScrapingBee cursor format (starts with "page:") if cursor != "" && strings.HasPrefix(cursor, "page:") { goto useScrapingBee // Continue with ScrapingBee pagination } // Otherwise decode as base64 DB offset if decoded, err := base64.StdEncoding.DecodeString(cursor); err == nil { offset, _ = strconv.Atoi(string(decoded)) } else { log.Printf("invalid cursor format %q, treating as offset 0", cursor) } ``` **Impact**: - No more pagination resets when mixing DB and ScrapingBee - Cursor format stays consistent throughout pagination session - Graceful fallback with logging for unexpected cursor formats ## 🟡 Medium Priority - Index Migration **Problem**: Upgraded databases kept old index definitions - PR #38 created idx_campaigns_trending, etc. without deadline predicate - CREATE INDEX IF NOT EXISTS kept old definitions - Only fresh databases got improved indexes **Fix**: Drop and recreate indexes on every migration ```sql -- Drop old indexes from PR #38 (missing deadline predicate) DROP INDEX IF EXISTS idx_campaigns_trending; DROP INDEX IF EXISTS idx_campaigns_newest; DROP INDEX IF EXISTS idx_campaigns_ending; DROP INDEX IF EXISTS idx_campaigns_category_trending; -- Create improved indexes with deadline predicate CREATE INDEX idx_campaigns_trending ON campaigns(state, deadline, velocity_24h DESC, percent_funded DESC); -- ... etc ``` **Impact**: - All databases (fresh and upgraded) get improved indexes - Proper filtering of expired rows - Optimal query performance for all installations ## Testing - [x] Backend tests pass - [x] Go compilation succeeds - [x] Added log import for cursor format warnings - [x] Verified goto label syntax Co-Authored-By: Claude Sonnet 4.5 --- backend/internal/db/db.go | 17 +++++++++++++---- backend/internal/handler/campaigns.go | 25 ++++++++++++++++++++----- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/backend/internal/db/db.go b/backend/internal/db/db.go index 6166df4..9d9deea 100644 --- a/backend/internal/db/db.go +++ b/backend/internal/db/db.go @@ -157,21 +157,30 @@ func Init(cfg *config.Config) error { // Create composite indexes for optimal query performance // All queries filter WHERE state='live' AND deadline >= NOW(), so deadline comes first // to exclude expired rows early and avoid table scans as expired data grows + // + // Note: PR #38 created indexes without deadline predicate. We drop and recreate + // to ensure upgraded databases get the improved definitions. if err := DB.Exec(` + -- Drop old indexes from PR #38 (missing deadline predicate) + DROP INDEX IF EXISTS idx_campaigns_trending; + DROP INDEX IF EXISTS idx_campaigns_newest; + DROP INDEX IF EXISTS idx_campaigns_ending; + DROP INDEX IF EXISTS idx_campaigns_category_trending; + -- Trending: WHERE state='live' AND deadline >= NOW() ORDER BY velocity_24h DESC, percent_funded DESC - CREATE INDEX IF NOT EXISTS idx_campaigns_trending + CREATE INDEX idx_campaigns_trending ON campaigns(state, deadline, velocity_24h DESC, percent_funded DESC); -- Newest: WHERE state='live' AND deadline >= NOW() ORDER BY first_seen_at DESC - CREATE INDEX IF NOT EXISTS idx_campaigns_newest + CREATE INDEX idx_campaigns_newest ON campaigns(state, deadline, first_seen_at DESC); -- Ending: WHERE state='live' AND deadline >= NOW() ORDER BY deadline ASC - CREATE INDEX IF NOT EXISTS idx_campaigns_ending + CREATE INDEX idx_campaigns_ending ON campaigns(state, deadline ASC); -- Category+Trending: WHERE state='live' AND deadline >= NOW() AND category_id=? ORDER BY velocity_24h DESC, percent_funded DESC - CREATE INDEX IF NOT EXISTS idx_campaigns_category_trending + CREATE INDEX idx_campaigns_category_trending ON campaigns(state, deadline, category_id, velocity_24h DESC, percent_funded DESC); -- Category+Newest: WHERE state='live' AND deadline >= NOW() AND category_id=? ORDER BY first_seen_at DESC diff --git a/backend/internal/handler/campaigns.go b/backend/internal/handler/campaigns.go index 67b8068..93f559b 100644 --- a/backend/internal/handler/campaigns.go +++ b/backend/internal/handler/campaigns.go @@ -2,8 +2,10 @@ package handler import ( "encoding/base64" + "log" "net/http" "strconv" + "strings" "time" "github.com/gin-gonic/gin" @@ -36,10 +38,22 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc { // - hot: velocity_24h (our metric) // - ending: deadline (exact from Kickstarter) if db.IsEnabled() { + // Detect cursor source: ScrapingBee uses "page:N", DB uses base64 offsets + // If cursor is from ScrapingBee, fall through to ScrapingBee to maintain format + if cursor != "" && strings.HasPrefix(cursor, "page:") { + // ScrapingBee cursor format detected - fall through to ScrapingBee path + // to avoid format mismatch (cannot mix base64 offsets with page numbers) + goto useScrapingBee + } + offset := 0 if cursor != "" { if decoded, err := base64.StdEncoding.DecodeString(cursor); err == nil { offset, _ = strconv.Atoi(string(decoded)) + } else { + // Invalid cursor format - treat as offset 0 but log warning + // This shouldn't happen if client respects cursor format + log.Printf("ListCampaigns: invalid cursor format %q, treating as offset 0", cursor) } } @@ -66,11 +80,10 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc { } // Return DB results if we have data - // Note: Once DB is seeded, always use DB to avoid cursor format conflicts - // (DB uses base64 offsets, ScrapingBee uses page:N format) + // Note: Once using DB cursors, always use DB to maintain cursor format consistency if err := q.Find(&campaigns).Error; err == nil { - // Even if empty, return DB results if we're paginating (cursor != "") - // This prevents cursor format cycling between DB and ScrapingBee + // Return DB results if we have data, OR if we're paginating with a DB cursor + // (cursor != "" and not a ScrapingBee cursor means it's a DB cursor) if len(campaigns) > 0 || cursor != "" { hasMore := len(campaigns) > limit if hasMore { @@ -91,8 +104,10 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc { // Only fall through to ScrapingBee on first load (cursor == "") and DB empty/failed } - // ScrapingBee fallback only for: + useScrapingBee: + // ScrapingBee fallback for: // - First load (cursor == "") when DB is unavailable or empty (cold start) + // - ScrapingBee pagination (cursor starts with "page:") // - SearchCampaigns endpoint (user search with query text) gqlSort, ok := sortMap[sort] if !ok {