From 9eae61bfcbd185c3e1731acdd1dde94604896347 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 17:12:20 +0800 Subject: [PATCH 1/2] feat(#14): make crawl page depth configurable via env vars Add CRAWL_DEPTH_NEWEST / CRAWL_DEPTH_MAGIC / CRAWL_DEPTH_ENDDATE env vars so page depth can be tuned at runtime without a code change. Defaults: newest=10, magic=5, end_date=3. --- backend/internal/service/cron.go | 37 +++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/backend/internal/service/cron.go b/backend/internal/service/cron.go index d113dcf..9492558 100644 --- a/backend/internal/service/cron.go +++ b/backend/internal/service/cron.go @@ -3,6 +3,8 @@ package service import ( "fmt" "log" + "os" + "strconv" "time" "github.com/kickwatch/backend/internal/model" @@ -11,6 +13,16 @@ import ( "gorm.io/gorm/clause" ) +// envInt reads an integer from an env var, returning defaultVal if unset or invalid. +func envInt(key string, defaultVal int) int { + if v := os.Getenv(key); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + return n + } + } + return defaultVal +} + type CronService struct { db *gorm.DB scrapingService *KickstarterScrapingService @@ -57,14 +69,23 @@ func (s *CronService) syncCategories() { } // crawlSorts defines the sort strategies used in each nightly crawl pass. -// "newest" catches new launches; "magic" catches trending; "end_date" catches expiring. -var crawlSorts = []struct { +// Default page depths can be overridden at runtime via env vars: +// +// CRAWL_DEPTH_NEWEST (default 10) +// CRAWL_DEPTH_MAGIC (default 5) +// CRAWL_DEPTH_ENDDATE (default 3) +func buildCrawlSorts() []struct { sort string - pageDepth int // pages per category for this sort pass -}{ - {"newest", 10}, // primary: new launches, full depth - {"magic", 5}, // trending/recommended - {"end_date", 3}, // ending soon + pageDepth int +} { + return []struct { + sort string + pageDepth int + }{ + {"newest", envInt("CRAWL_DEPTH_NEWEST", 10)}, + {"magic", envInt("CRAWL_DEPTH_MAGIC", 5)}, + {"end_date", envInt("CRAWL_DEPTH_ENDDATE", 3)}, + } } func (s *CronService) RunCrawlNow() error { @@ -73,7 +94,7 @@ func (s *CronService) RunCrawlNow() error { upserted := 0 var allCampaigns []model.Campaign - for _, sortCfg := range crawlSorts { + for _, sortCfg := range buildCrawlSorts() { for _, cat := range crawlCategories { depth := cat.PageDepth if sortCfg.pageDepth < depth { From 151e1234d54b802d8254f3c9fd24479082fffe64 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 18:21:30 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(#21):=20use=20sortCfg.pageDepth=20direc?= =?UTF-8?q?tly=20=E2=80=94=20env=20vars=20can=20now=20increase=20depth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/internal/service/cron.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/internal/service/cron.go b/backend/internal/service/cron.go index 9492558..db786d6 100644 --- a/backend/internal/service/cron.go +++ b/backend/internal/service/cron.go @@ -96,10 +96,9 @@ func (s *CronService) RunCrawlNow() error { for _, sortCfg := range buildCrawlSorts() { for _, cat := range crawlCategories { - depth := cat.PageDepth - if sortCfg.pageDepth < depth { - depth = sortCfg.pageDepth - } + // sortCfg.pageDepth is env-configurable (can raise or lower). + // cat.PageDepth is only the default used when no env var is set. + depth := sortCfg.pageDepth for page := 1; page <= depth; page++ { campaigns, err := s.scrapingService.DiscoverCampaigns(cat.ID, sortCfg.sort, page) if err != nil {