From 1ee3a237a1c033286b3d6c7e9d106ca181e7ca80 Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Thu, 30 Apr 2026 14:30:56 +0530 Subject: [PATCH 1/7] feat: add regex dependency and implement progress tracking in YtdlpManager --- Cargo.lock | 1 + Cargo.toml | 1 + src/controllers/api/v1/ytdlp_controller.rs | 36 +++- src/services/ytdlp/mod.rs | 198 ++++++++++----------- 4 files changed, 128 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a7af335..c8de799 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -985,6 +985,7 @@ dependencies = [ "dotenvy", "governor", "http-body-util", + "regex", "reqwest", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index a49b32e..c463f0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" thiserror = "2.0" tokio = { version = "1", features = ["full"] } +regex = "1.11" tokio-stream = "0.1" tower = "0.5" tower-http = { version = "0.6", features = ["cors", "trace", "compression-full", "fs"] } diff --git a/src/controllers/api/v1/ytdlp_controller.rs b/src/controllers/api/v1/ytdlp_controller.rs index 2f4ef42..cfd960a 100644 --- a/src/controllers/api/v1/ytdlp_controller.rs +++ b/src/controllers/api/v1/ytdlp_controller.rs @@ -9,7 +9,7 @@ use axum::{ }, }; use std::{borrow::Cow, convert::Infallible, path::PathBuf, time::Duration}; -use tokio::{sync::mpsc, time::sleep}; +use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tower::ServiceExt; use tower_http::services::ServeFile; @@ -75,6 +75,7 @@ pub async fn list_download_jobs( } /// Streams progress of a download job via SSE. +#[allow(clippy::too_many_lines)] pub async fn stream_download_progress( State(state): State, Path(id): Path, @@ -107,6 +108,7 @@ pub async fn stream_download_progress( let manager = state.ytdlp_manager; let (tx, rx) = mpsc::channel::>(16); + let mut broadcast_rx = manager.subscribe(); let stream_job_id = id; let stream_path = request_path; let stream_client_ip = client_ip; @@ -114,7 +116,35 @@ pub async fn stream_download_progress( tokio::spawn(async move { let mut last_snapshot = String::new(); - loop { + // Send initial snapshot + if let Some(job) = manager.get_job(&stream_job_id) { + let job_resp = YtdlpJobResponse::from(job); + let snapshot = serde_json::to_string(&job_resp).unwrap_or_default(); + last_snapshot.clone_from(&snapshot); + if tx + .send(Ok(Event::default().event("progress").data(snapshot))) + .await + .is_err() + { + return; + } + + if matches!( + job_resp.status, + YtdlpJobStatus::Finished | YtdlpJobStatus::Failed + ) { + let _ = tx + .send(Ok(Event::default().event("done").data("done"))) + .await; + return; + } + } + + while let Ok(updated_id) = broadcast_rx.recv().await { + if updated_id != stream_job_id { + continue; + } + if let Some(job) = manager.get_job(&stream_job_id) { let job_resp = YtdlpJobResponse::from(job); let snapshot = serde_json::to_string(&job_resp).unwrap_or_default(); @@ -155,8 +185,6 @@ pub async fn stream_download_progress( ); break; } - - sleep(Duration::from_millis(1500)).await; } info!( diff --git a/src/services/ytdlp/mod.rs b/src/services/ytdlp/mod.rs index 5b01687..bddeecc 100644 --- a/src/services/ytdlp/mod.rs +++ b/src/services/ytdlp/mod.rs @@ -7,20 +7,26 @@ use crate::{ }, }; use dashmap::DashMap; +use regex::Regex; use std::{ path::{Component, Path, PathBuf}, process::Stdio, - sync::Arc, + sync::{Arc, OnceLock}, time::SystemTime, }; use tokio::{ fs, io::{AsyncBufReadExt, AsyncRead, BufReader}, process::Command, - sync::Semaphore, + sync::{Semaphore, broadcast}, }; use tracing::{debug, error, info}; +static PROGRESS_RE: OnceLock = OnceLock::new(); +static PEAK_RE: OnceLock = OnceLock::new(); +static QUEUED_RE: OnceLock = OnceLock::new(); +static FIRST_PCT_RE: OnceLock = OnceLock::new(); + const MAX_CAPTURED_OUTPUT_BYTES: usize = 8_000; const YTDLP_TIMEOUT_SECS: u64 = 7_200; const ARIA2_DOWNLOADER: &str = "aria2c"; @@ -43,17 +49,20 @@ pub struct YtdlpManager { jobs: Arc>, semaphore: Arc, job_counter: Arc, + progress_tx: broadcast::Sender, } impl YtdlpManager { /// Creates a new instance of `YtdlpManager` and starts the background cleanup task. #[must_use] pub fn new(cfg: Arc) -> Self { + let (progress_tx, _) = broadcast::channel(1024); let manager = Self { semaphore: Arc::new(Semaphore::new(cfg.max_concurrent_downloads)), cfg, jobs: Arc::new(DashMap::new()), job_counter: Arc::new(std::sync::atomic::AtomicU64::new(1)), + progress_tx, }; let jobs_weak = Arc::downgrade(&manager.jobs); @@ -164,6 +173,11 @@ impl YtdlpManager { .collect() } + /// Subscribes to the job progress broadcast channel. + pub fn subscribe(&self) -> broadcast::Receiver { + self.progress_tx.subscribe() + } + fn next_id(&self) -> String { let ts = now_unix(); let counter = self @@ -212,8 +226,9 @@ impl YtdlpManager { self.mark_job_started(&id); - if let Err(err) = fs::create_dir_all(&output_dir).await { - self.mark_job_failed(&id, format!("failed to create output directory: {err}")); + let temp_dir = PathBuf::from(&output_dir).join("tmp").join(&id); + if let Err(err) = fs::create_dir_all(&temp_dir).await { + self.mark_job_failed(&id, format!("failed to create temp directory: {err}")); return; } @@ -243,7 +258,7 @@ impl YtdlpManager { } cmd.arg("-P") - .arg(&output_dir) + .arg(&temp_dir) .arg("-o") .arg(format!("{id}.%(ext)s")) .arg(payload.url.clone()); @@ -290,6 +305,7 @@ impl YtdlpManager { Err(err) => { self.mark_job_failed(&id, format!("failed to spawn yt-dlp: {err}")); error!("failed ytdlp job id={id}: {err}"); + let _ = fs::remove_dir_all(&temp_dir).await; return; } }; @@ -316,45 +332,48 @@ impl YtdlpManager { match wait_result { Ok(Ok(status)) if status.success() => { - let files = collect_downloaded_files(&output_dir, &id).await; - self.mark_job_finished(&id, files); - info!("finished ytdlp job id={id}"); + let temp_dir_str = temp_dir.to_string_lossy(); + let files = collect_downloaded_files(&temp_dir_str, &id).await; + + if let Err(err) = fs::create_dir_all(&output_dir).await { + self.mark_job_failed( + &id, + format!("failed to create final output directory: {err}"), + ); + } else { + let mut moved_files = Vec::with_capacity(files.len()); + for file in files { + let from = temp_dir.join(&file); + let to = PathBuf::from(&output_dir).join(&file); + if fs::rename(&from, &to).await.is_ok() { + moved_files.push(file); + } + } + self.mark_job_finished(&id, moved_files); + info!("finished ytdlp job id={id}"); + } + let _ = fs::remove_dir_all(&temp_dir).await; } Ok(Ok(status)) => { let error_message = truncate_message(&combined_output, 2_000); self.mark_job_failed(&id, format!("yt-dlp failed ({status}): {error_message}")); - let is_base_dir = output_dir == self.cfg.download_dir; - Self::cleanup_failed_files(&output_dir, &id, is_base_dir).await; + Self::cleanup_failed_files(&temp_dir.to_string_lossy(), &id, false).await; } Ok(Err(err)) => { self.mark_job_failed(&id, format!("failed to wait for yt-dlp: {err}")); error!("yt-dlp process error for job id={id}: {err}"); - let is_base_dir = output_dir == self.cfg.download_dir; - Self::cleanup_failed_files(&output_dir, &id, is_base_dir).await; + Self::cleanup_failed_files(&temp_dir.to_string_lossy(), &id, false).await; } Err(_) => { - self.mark_job_failed(&id, "yt-dlp process timed out".to_string()); + self.mark_job_failed(&id, format!("yt-dlp timed out after {YTDLP_TIMEOUT_SECS}s")); error!("job timed out id={id}"); - let is_base_dir = output_dir == self.cfg.download_dir; - Self::cleanup_failed_files(&output_dir, &id, is_base_dir).await; + Self::cleanup_failed_files(&temp_dir.to_string_lossy(), &id, false).await; } } } - async fn cleanup_failed_files(output_dir: &str, id: &str, is_base_dir: bool) { - let id_prefix = format!("{id}."); - if let Ok(mut entries) = fs::read_dir(output_dir).await { - while let Ok(Some(entry)) = entries.next_entry().await { - if let Ok(file_name) = entry.file_name().into_string() - && file_name.starts_with(&id_prefix) - { - let _ = fs::remove_file(entry.path()).await; - } - } - } - if !is_base_dir { - let _ = fs::remove_dir(output_dir).await; - } + async fn cleanup_failed_files(temp_dir: &str, _id: &str, _is_base_dir: bool) { + let _ = fs::remove_dir_all(temp_dir).await; } fn update_job(&self, id: &str, update_fn: F) @@ -363,6 +382,7 @@ impl YtdlpManager { { if let Some(mut job) = self.jobs.get_mut(id) { update_fn(job.value_mut()); + let _ = self.progress_tx.send(id.to_string()); } } @@ -488,20 +508,33 @@ fn is_windows_absolute_path(path: &str) -> bool { bytes.len() >= 3 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' && bytes[2] == b'/' } +#[allow(clippy::expect_used)] fn parse_aria2_progress(line: &str) -> Option { - if !is_aria2_progress_line(line) { - return None; - } - - let total = line - .strip_prefix("[DL:") - .and_then(|value| value.find(']').map(|idx| value[..idx].trim().to_string())) - .filter(|value| !value.is_empty()) - .or_else(|| extract_aria2_total_from_fragment(line)); + let re = PROGRESS_RE.get_or_init(|| { + Regex::new( + r"\[DL:(?P[^\]]*)\](?:\s+DL:(?P[^\s]*))?(?:\s+ETA:(?P[^\s]*))?", + ) + .expect("invalid progress regex") + }); + + let caps = re.captures(line)?; + + let total = caps + .name("total") + .map(|m| m.as_str().trim().to_string()) + .filter(|s| !s.is_empty()); + + let speed = caps + .name("speed") + .map(|m| m.as_str().to_string()) + .filter(|s| !s.is_empty()); + + let eta = caps + .name("eta") + .map(|m| m.as_str().to_string()) + .filter(|s| !s.is_empty()); let percent = extract_peak_percent(line).or_else(|| extract_first_percent(line)); - let speed = extract_after_marker(line, " DL:"); - let eta = extract_after_marker(line, " ETA:"); if percent.is_none() && total.is_none() && speed.is_none() && eta.is_none() { return None; @@ -515,27 +548,6 @@ fn parse_aria2_progress(line: &str) -> Option { }) } -fn is_aria2_progress_line(line: &str) -> bool { - line.starts_with("[DL:") - || (line.starts_with('[') - && (line.contains(" DL:") - || line.contains(" ETA:") - || line.contains("CN:") - || line.contains("%)"))) -} - -fn extract_aria2_total_from_fragment(line: &str) -> Option { - line.trim_matches(|c| c == '[' || c == ']') - .split_whitespace() - .find_map(|token| { - let progress = token.split_once('(').map_or(token, |(head, _)| head); - progress - .split_once('/') - .map(|(_, total)| total.trim().trim_matches(']').to_string()) - .filter(|value| !value.is_empty()) - }) -} - fn format_aria2_progress_message(line: &str, parsed: &ParsedProgress) -> String { let active = line.matches("[#").count(); let queued = extract_queued_fragments(line); @@ -565,56 +577,34 @@ fn format_aria2_progress_message(line: &str, parsed: &ParsedProgress) -> String parts.join(" | ") } +#[allow(clippy::expect_used)] fn extract_queued_fragments(line: &str) -> Option { - let start = line.find("(+")? + 2; - let tail = &line[start..]; - let end = tail.find(')')?; - tail[..end].parse::().ok() + let re = QUEUED_RE + .get_or_init(|| Regex::new(r"\(\+(?P\d+)\)").expect("invalid queued regex")); + re.captures(line) + .and_then(|caps| caps.name("queued")) + .and_then(|m| m.as_str().parse::().ok()) } +#[allow(clippy::expect_used)] fn extract_peak_percent(line: &str) -> Option { - let mut max_pct: Option = None; - let mut remaining = line; - - while let Some(end_idx) = remaining.find("%)") { - let upto = &remaining[..end_idx]; - if let Some(start_idx) = upto.rfind('(') - && let Ok(value) = upto[(start_idx + 1)..].parse::() - { - max_pct = Some(max_pct.map_or(value, |current| current.max(value))); - } - remaining = &remaining[(end_idx + 3)..]; - } - - max_pct + let re = + PEAK_RE.get_or_init(|| Regex::new(r"\((?P[\d.]+)\%\)").expect("invalid peak regex")); + re.captures_iter(line) + .filter_map(|caps| { + caps.name("pct") + .and_then(|m| m.as_str().parse::().ok()) + }) + .fold(None, |max, val| Some(max.map_or(val, |m| m.max(val)))) } +#[allow(clippy::expect_used)] fn extract_first_percent(line: &str) -> Option { - let idx = line.find('%')?; - let prefix = &line[..idx]; - let number = prefix - .chars() - .rev() - .take_while(|c| c.is_ascii_digit() || *c == '.') - .collect::() - .chars() - .rev() - .collect::(); - if number.is_empty() { - None - } else { - number.parse::().ok() - } -} - -fn extract_after_marker(line: &str, marker: &str) -> Option { - let idx = line.find(marker)?; - let after = &line[(idx + marker.len())..]; - let value = after - .chars() - .take_while(|c| c.is_ascii_alphanumeric() || *c == '.' || *c == '/' || *c == ':') - .collect::(); - if value.is_empty() { None } else { Some(value) } + let re = FIRST_PCT_RE + .get_or_init(|| Regex::new(r"(?P[\d.]+)%").expect("invalid first percent regex")); + re.captures(line) + .and_then(|caps| caps.name("pct")) + .and_then(|m| m.as_str().parse::().ok()) } fn spawn_line_collector( From b3e8245fecf64fd6ed54036c5cdb2a619e57a9dd Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Thu, 30 Apr 2026 11:59:36 +0000 Subject: [PATCH 2/7] chore: update code structure for improved readability and maintainability Co-authored-by: Copilot --- README.md | 276 ++++++++++++++++-- .../Themed-Architecture-Diagram-code.md | 45 --- docs/images/Themed-Architecture-Diagram.svg | 1 - 3 files changed, 245 insertions(+), 77 deletions(-) delete mode 100644 docs/images/Themed-Architecture-Diagram-code.md delete mode 100644 docs/images/Themed-Architecture-Diagram.svg diff --git a/README.md b/README.md index 6fad6d7..5af284d 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,239 @@ # Nadzu-API -Personal backend API built with Rust. Focused on concurrency, performance, security, and future-proof design. +Personal backend API built with Rust, focused on concurrency, performance, security, and long-term maintainability. + +## Architecture at a Glance + +
+Core system diagram + +```mermaid +flowchart TB + %% ========================================== + %% BRAND COLORS & STYLING CLASSES + %% ========================================== + classDef extClient fill:#2EA043,color:#ffffff,stroke:#1e6a2c,stroke-width:2px; + classDef proxy fill:#00ADD8,color:#ffffff,stroke:#007a99,stroke-width:2px; + classDef rustApp fill:#111111,color:#DEA584,stroke:#DEA584,stroke-width:2px; + classDef module fill:#1a1a1a,color:#DDDDDD,stroke:#444444,stroke-width:1px,rx:4px; + classDef pg fill:#336791,color:#ffffff,stroke:#234a6a,stroke-width:2px; + classDef redis fill:#DC382D,color:#ffffff,stroke:#9e2820,stroke-width:2px; + classDef extAPI fill:#444444,color:#ffffff,stroke:#222222,stroke-width:2px; + classDef warp fill:#F38020,color:#ffffff,stroke:#b35c00,stroke-width:2px; + + %% ========================================== + %% INGRESS + %% ========================================== + Client["Web Client / API Consumer"]:::extClient + Caddy{"Caddy Reverse Proxy"}:::proxy + + Client ==>|"HTTPS Requests"| Caddy + + %% ========================================== + %% CORE RUST BACKEND + %% ========================================== + subgraph CoreBackend ["Rust Backend Application Core"] + direction TB + + MW["Middleware Layer\n(Auth, CORS, RateLimit, Captcha)"]:::module + Router["Routing Layer\n(/api/v1/*)"]:::module + + subgraph Controllers ["Controllers"] + direction LR + RootCtrl["Root / Health"]:::module + YtCtrl["YTDLP Controller"]:::module + ContribCtrl["Contributions"]:::module + ValidCtrl["Validation"]:::module + end + + subgraph Services ["Business Logic Services"] + direction LR + YtSvc["YT-DLP Service"]:::module + ContribSvc["Contributions Service"]:::module + end + + subgraph DataState ["Data & State Management"] + direction LR + PgConn["Postgres Pool (sqlx)"]:::module + RdConn["Redis Multiplex (bb8)"]:::module + AppState["Shared App State (DashMap)"]:::module + end + + %% Internal Flow + MW --> Router + Router --> RootCtrl & YtCtrl & ContribCtrl & ValidCtrl + + YtCtrl -->|"Calls"| YtSvc + ContribCtrl -->|"Calls"| ContribSvc + + YtSvc --> AppState + ContribSvc --> AppState + AppState --> PgConn & RdConn + end + + %% ========================================== + %% DATA & EXTERNAL WORKERS + %% ========================================== + subgraph DataLayer ["Data Layer"] + PG[(PostgreSQL 15)]:::pg + Redis[(Redis Alpine)]:::redis + end + + subgraph WorkerLayer ["Media Processing Layer"] + Worker["yt-dlp / aria2c Process"]:::rustApp + WARP["WARP SOCKS5 Proxy"]:::warp + end + + subgraph ExternalLayer ["External Services"] + GH["GitHub GraphQL API"]:::extAPI + YT["External Media Hosts"]:::extAPI + end + + %% ========================================== + %% CROSS-BOUNDARY CONNECTIONS + %% ========================================== + Caddy ==>|"Proxies API Traffic"| MW + + PgConn <==>|"sqlx queries"| PG + RdConn <==>|"bb8 multiplexing"| Redis + + YtSvc -.->|"tokio::process::Command"| Worker + Worker ==>|"SOCKS5"| WARP + WARP ==>|"Obfuscated Download"| YT + + ContribSvc ==>|"Anti-Corruption Layer"| GH + + %% ========================================== + %% STYLING FIXES (GitHub Compatible) + %% ========================================== + style CoreBackend fill:none,stroke:#FF5252,stroke-width:2px + style Controllers fill:none,stroke:none + style Services fill:none,stroke:none + style DataState fill:none,stroke:none + style DataLayer fill:none,stroke:none + style WorkerLayer fill:none,stroke:none + style ExternalLayer fill:none,stroke:none + +``` + +
+ +
+Infrastructure diagram + +```mermaid +flowchart TB + %% BRAND COLORS & STYLING CLASSES + classDef extAdmin fill:#2EA043,color:#ffffff,stroke:#1e6a2c,stroke-width:2px; + classDef extPublic fill:#444444,color:#ffffff,stroke:#222222,stroke-width:2px; + classDef cloudflare fill:#F38020,color:#ffffff,stroke:#b35c00,stroke-width:2px; + classDef digitalocean fill:#0069FF,color:#ffffff,stroke:#004bbf,stroke-width:2px; + classDef firewall fill:#D93F0B,color:#ffffff,stroke:#8c2907,stroke-width:2px; + classDef github fill:#24292E,color:#ffffff,stroke:#111417,stroke-width:2px; + classDef docker fill:#2496ED,color:#ffffff,stroke:#1868a6,stroke-width:2px; + classDef rust fill:#000000,color:#DEA584,stroke:#DEA584,stroke-width:2px; + classDef caddy fill:#00ADD8,color:#ffffff,stroke:#007a99,stroke-width:2px; + classDef pg fill:#336791,color:#ffffff,stroke:#234a6a,stroke-width:2px; + classDef redis fill:#DC382D,color:#ffffff,stroke:#9e2820,stroke-width:2px; + + %% EXTERNAL TRAFFIC + Admin["Admin (Trusted VPN IP)"]:::extAdmin + Public["Public Internet"]:::extPublic + + %% CLOUDFLARE + DNS["Cloudflare DNS & Proxy\n(api.nadzu.me)"]:::cloudflare + + %% DIGITALOCEAN + subgraph DigitalOcean ["DigitalOcean Infrastructure"] + + subgraph DO_Firewall ["DO Cloud Firewall"] + direction LR + FW_SSH{"Port 22 (SSH)\nALLOW: Admin"}:::firewall + FW_Web{"Ports 80/443 (HTTP/S)\nALLOW: Cloudflare"}:::firewall + end + + subgraph Droplet ["Ubuntu Droplet Runtime"] + Caddy{"Caddy Reverse Proxy"}:::caddy + Volume[("DO Block Storage\nMounted at /downloads")]:::digitalocean + + subgraph Docker_Compose ["Docker Compose Environment"] + App["Rust Backend API"]:::rust + Worker["YT-DLP / Aria2c Process"]:::rust + WARP["WARP SOCKS5 Proxy"]:::cloudflare + + subgraph DBs ["Data Layer"] + direction LR + PG[(PostgreSQL)]:::pg + Redis[(Redis)]:::redis + end + end + end + end + + ExternalInternet(("External Internet\n(YouTube, GitHub)")):::extPublic + + %% INDEPENDENT REGISTRIES (Placed outside subgraphs to fix layout bugs) + GHCR[("GitHub Container Registry\n(Private)")]:::github + DockerHub[("Docker Hub\n(Public)")]:::docker + + %% ---------------------------------------------------- + %% TRAFFIC FLOW (Rank 1 to 4) + %% ---------------------------------------------------- + Admin -->|"SSH / Deploy"| FW_SSH + Admin -->|"API & Files"| DNS + Public -->|"API Only"| DNS + + DNS ==>|"Proxied Web Traffic"| FW_Web + + FW_SSH -.->|"Access Granted"| Droplet + FW_Web ==>|"Forwards to Web Server"| Caddy + + %% ---------------------------------------------------- + %% CADDY ROUTING & VOLUME TRICK (Rank 5) + %% ---------------------------------------------------- + Caddy ==>|"Public API (/api/v1/*)"| App + Caddy -.->|"SECURE: Admin IP Only\n(/nadun/fs/*)"| Volume + + %% ---------------------------------------------------- + %% APP LOGIC & VOLUME (Rank 6) + %% ---------------------------------------------------- + Volume -.->|"Mounted into"| Worker + App ==>|"Spawns DL Tasks"| Worker + App -->|"Persists Data"| PG + App -->|"Caches State"| Redis + + %% ---------------------------------------------------- + %% OBFUSCATION (Rank 7 & 8) + %% ---------------------------------------------------- + Worker ==>|"Routes via SOCKS5"| WARP + WARP ==>|"Obfuscated Outbound"| ExternalInternet + + %% ---------------------------------------------------- + %% REGISTRY PULLS (Auto-aligns cleanly) + %% ---------------------------------------------------- + GHCR -.->|"Pulls App Image"| App + DockerHub -.->|"Pulls WARP Image"| WARP + + %% STYLES + style DigitalOcean fill:none,stroke:#0069FF,stroke-width:2px,stroke-dasharray: 5 5 + style DO_Firewall fill:none,stroke:#D93F0B,stroke-width:2px + style Droplet fill:none,stroke:#0069FF,stroke-width:1px + style Docker_Compose fill:none,stroke:#2496ED,stroke-width:2px + style DBs fill:none,stroke:none +``` + +
## Features -### Core API Functionality +### Core API -* CORS support. +* CORS handling. * Rate limiting. * API versioning (v1). -* Health checks. -* Logging. -* Postman v3 Collection included. +* Health and root endpoints. +* Structured logging. +* Postman v3 collection included. ### Media Downloading @@ -19,13 +241,13 @@ Personal backend API built with Rust. Focused on concurrency, performance, secur * Download acceleration via aria2c integration. * Job lifecycle management: enqueue, progress tracking, and result retrieval. * Server-Sent Events (SSE) for real-time job progress updates. -* Endpoint to list supported sites. +* Endpoint for listing supported sites. ### Proxy Obfuscation * Bypasses geo-restrictions and anti-bot measures. -* Separate container utilizing the Cloudflare WARP client for outbound requests. -* Uses a custom [**Cloudflare WARP Proxy Docker Image**][docker-hub-image] (1.1k pulls) maintained in [**its dedicated repository**][warp-proxy-repo]. +* Dedicated container that uses the Cloudflare WARP client for outbound requests. +* Uses a custom [**Cloudflare WARP Proxy Docker Image**][docker-hub-image] (1.1k pulls), maintained in [**its dedicated repository**][warp-proxy-repo]. ### Security and Anti-Abuse @@ -33,25 +255,25 @@ Personal backend API built with Rust. Focused on concurrency, performance, secur ### Operational -* CI Pipelines for linting, testing, and building. -* CD pipeline for Docker image building and publishing to GitHub Container Registry, including: +* CI pipelines for linting, testing, and building. +* CD pipeline for Docker image builds and publishing to GitHub Container Registry, including: * zstd compression * zstd builder * custom BuildKit caching for faster builds * multi-platform Docker image support -## Architecture and Design +## Engineering Design * Clean layered architecture (controllers -> services -> models). -* Memory management utilizing DashMap for sharding and weak references for lifecycle control. -* Concurrency control managed via Tokio semaphores. +* Memory management with DashMap sharding and weak references for lifecycle control. +* Concurrency control using Tokio semaphores. -## Development Lifecycle +## Development Workflow -* Complete agile lifecycle for fast development and deployment. +* Iterative development flow designed for fast delivery. * Makefile-first approach for task automation and consistency. * CI pipeline using GitHub Actions for linting (`cargo clippy`), testing (`cargo test`), and building. -* Complete unit and integration test coverage. +* Unit and integration test coverage. * Production-like local development environment using Docker Compose and Caddy with self-signed TLS. * Active [**Public Changelog**][changelog] including release notes. @@ -67,17 +289,10 @@ Personal backend API built with Rust. Focused on concurrency, performance, secur [![DigitalOcean Referral Badge][do-referral-badge]][do-referral-link] -Provisioned via Terraform using Infrastructure as Code principles. +Provisioned with Terraform using Infrastructure as Code principles. * **DigitalOcean Provider:** Droplet provisioning with cloud-init, block volume management, and firewall configuration. -* **Cloudflare Provider:** R2 bucket utilized for Terraform remote state backend and DNS entry management for full HTTPS support. - -
-Infrastructure Diagram - -![Themed Architecture Diagram][arch-diagram] - -
+* **Cloudflare Provider:** R2 bucket used for Terraform remote state and DNS record management for full HTTPS support. ## Project Structure @@ -221,11 +436,10 @@ infra/ [changelog]: https://nadzu.me/posts/rust-backend-changelog/ [do-referral-badge]: https://web-platforms.sfo2.cdn.digitaloceanspaces.com/WWW/Badge%202.svg [do-referral-link]: https://www.digitalocean.com/?refcode=17bb57d3d632&utm_campaign=Referral_Invite&utm_medium=Referral_Program&utm_source=badge -[arch-diagram]: docs/images/Themed-Architecture-Diagram.svg [yt-dlp-repo]: https://github.com/yt-dlp/yt-dlp -## Things i learned +## Things I Learned -- rust: Iniital leraning curve is steep but the long term benefits in performance and safety and low level control are worth it ```❤️``` -- tf: cloud init is perfect to bootstrap the server, it have max size limitation depending on the provider. -- tf: Cloudflare provider only supports R2 buckets; use the AWS Terraform provider for object uploads to R2. \ No newline at end of file +- Rust: The initial learning curve is steep, but the long-term benefits in performance, safety, and low-level control are worth it. +- Terraform: cloud-init is excellent for bootstrapping a server, but it has provider-specific size limits. +- Terraform: The Cloudflare provider only supports R2 buckets; use the AWS Terraform provider for object uploads to R2. \ No newline at end of file diff --git a/docs/images/Themed-Architecture-Diagram-code.md b/docs/images/Themed-Architecture-Diagram-code.md deleted file mode 100644 index 223370c..0000000 --- a/docs/images/Themed-Architecture-Diagram-code.md +++ /dev/null @@ -1,45 +0,0 @@ -GitHub README files don't support Mermaid diagram themes directly, so this diagram is rendered in a separate file and linked here. The code for the diagram is as follows: - -```mermaid ---- -config: - theme: neo-dark - look: neo - layout: dagre ---- -flowchart TB - subgraph Local_CI["Local / CI"] - TF["Terraform CLI"] - Make["Makefile"] - end - subgraph DO_Components_Module["DO Components Module"] - Droplet("do_droplet") - Volume[("do_volume")] - VolAttach("do_volume_attachment") - CloudInit["cloud-init.template"] - end - subgraph Terraform_Architecture["Terraform Architecture"] - RootEnv["Account Environment (naduns-team)"] - R2Backend[("Cloudflare R2 Backend")] - DO_Components_Module - end - subgraph Provisioned_System["Runtime: DigitalOcean Ubuntu Droplet"] - Boot{"Cloud-init Execution"} - Docker["Docker Engine"] - Container(["Nadzu App Container"]) - end - Make -- "Reads .env & Passes TF_VAR" --> TF - TF --> RootEnv - RootEnv -. Reads / Writes State .-> R2Backend - VolAttach -- Links --> Volume - VolAttach -- To --> Droplet - RootEnv -- Provisions --> Droplet & Volume & VolAttach - RootEnv -- Renders --> CloudInit - CloudInit == "Injected & Executed as user-data payload" ===> Boot - Droplet -. Hosts Environment .-> Provisioned_System - Boot -- Writes Secrets & Installs --> Docker - Docker -- Authenticates & Pulls --> GHCR[("GitHub Container Registry")] - GHCR -- Runs Image --> Container - Volume -. Persistent Mount .-> Container - -``` \ No newline at end of file diff --git a/docs/images/Themed-Architecture-Diagram.svg b/docs/images/Themed-Architecture-Diagram.svg deleted file mode 100644 index 9e2ea3a..0000000 --- a/docs/images/Themed-Architecture-Diagram.svg +++ /dev/null @@ -1 +0,0 @@ -

Runtime: DigitalOcean Ubuntu Droplet

Terraform Architecture

Local / CI

DO Components Module

Reads .env & Passes TF_VAR

Reads / Writes State

Links

To

Provisions

Provisions

Provisions

Renders

Injected & Executed as user-data payload

Writes Secrets & Installs

Authenticates & Pulls

Runs Image

Persistent Mount

Hosts Environment

Terraform CLI

Makefile

do_droplet

do_volume

do_volume_attachment

cloud-init.template

Account Environment (naduns-team)

Cloudflare R2 Backend

Cloud-init Execution

Docker Engine

Nadzu App Container

GitHub Container Registry

\ No newline at end of file From 5bdb6f9ae9257901bbb689f4bed96b971816b410 Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Sun, 3 May 2026 07:43:48 +0530 Subject: [PATCH 3/7] refactor: improve error handling during file movement in YtdlpManager --- README.md | 203 +++++++++++++-------- src/controllers/api/v1/ytdlp_controller.rs | 82 +++++---- src/services/ytdlp/mod.rs | 25 ++- 3 files changed, 196 insertions(+), 114 deletions(-) diff --git a/README.md b/README.md index 5af284d..5057e9c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,63 @@ Personal backend API built with Rust, focused on concurrency, performance, security, and long-term maintainability. -## Architecture at a Glance +## Table of Contents +- [Features](#features) +- [Architecture Diagrams](#architecture-diagrams) +- [Engineering Design](#engineering-design) +- [Infrastructure](#infrastructure) +- [Project Structure](#project-structure) +- [Development Workflow](#development-workflow) +- [Packaging and Deployment](#packaging-and-deployment) +- [Things I Learned](#things-i-learned) +- [Acknowledgements](#acknowledgements) + +## Features + +### Core API + +* CORS handling. +* Rate limiting. +* API versioning (v1). +* Health and root endpoints. +* Structured logging. +* Postman v3 collection included. + +### Media Downloading + +* Multi-platform media downloading via yt-dlp. +* Download acceleration via aria2c integration. +* Job lifecycle management: enqueue, progress tracking, and result retrieval. +* Server-Sent Events (SSE) for real-time job progress updates. +* Endpoint for listing supported sites. + +### GitHub Contributions Graph + +* Fetches contribution data from the GitHub GraphQL API. +* Provides an endpoint to retrieve contribution stats for a specified user. + +### Proxy Obfuscation + +* Bypasses geo-restrictions and anti-bot measures. +* Dedicated container that uses the Cloudflare WARP client for outbound requests. +* Uses a custom [**Cloudflare WARP Proxy Docker Image**][docker-hub-image] (1.1k pulls), maintained in [**its dedicated repository**][warp-proxy-repo]. + +### Security and Anti-Abuse + +* CAPTCHA verification middleware powered by reCAPTCHA. +* API Key have Higher rate limits and access to protected endpoints. +* Caddy reverse proxy for TLS termination and IP-based access control. + +### Operational + +* CI pipelines for linting, testing, and building. +* CD pipeline for Docker image builds and publishing to GitHub Container Registry, including: + * zstd compression + * zstd builder + * custom BuildKit caching for faster builds + * multi-platform Docker image support + +## Architecture Diagrams
Core system diagram @@ -224,67 +280,12 @@ flowchart TB
-## Features - -### Core API - -* CORS handling. -* Rate limiting. -* API versioning (v1). -* Health and root endpoints. -* Structured logging. -* Postman v3 collection included. - -### Media Downloading - -* Multi-platform media downloading via yt-dlp. -* Download acceleration via aria2c integration. -* Job lifecycle management: enqueue, progress tracking, and result retrieval. -* Server-Sent Events (SSE) for real-time job progress updates. -* Endpoint for listing supported sites. - -### Proxy Obfuscation - -* Bypasses geo-restrictions and anti-bot measures. -* Dedicated container that uses the Cloudflare WARP client for outbound requests. -* Uses a custom [**Cloudflare WARP Proxy Docker Image**][docker-hub-image] (1.1k pulls), maintained in [**its dedicated repository**][warp-proxy-repo]. - -### Security and Anti-Abuse - -* CAPTCHA verification middleware powered by reCAPTCHA. - -### Operational - -* CI pipelines for linting, testing, and building. -* CD pipeline for Docker image builds and publishing to GitHub Container Registry, including: - * zstd compression - * zstd builder - * custom BuildKit caching for faster builds - * multi-platform Docker image support - ## Engineering Design * Clean layered architecture (controllers -> services -> models). * Memory management with DashMap sharding and weak references for lifecycle control. * Concurrency control using Tokio semaphores. -## Development Workflow - -* Iterative development flow designed for fast delivery. -* Makefile-first approach for task automation and consistency. -* CI pipeline using GitHub Actions for linting (`cargo clippy`), testing (`cargo test`), and building. -* Unit and integration test coverage. -* Production-like local development environment using Docker Compose and Caddy with self-signed TLS. -* Active [**Public Changelog**][changelog] including release notes. - -## Packaging and Deployment - -* Dockerized using multi-stage and multi-platform builds (5-stage build with tini). -* Optimized Rust builds using Cargo-Chef. -* Custom Docker builder implementing ZSTD compression. -* Pre-installed dependencies: yt-dlp Python packages, FFmpeg, and FFprobe for media validation. -* Published automatically to a private GitHub Container Registry. - ## Infrastructure [![DigitalOcean Referral Badge][do-referral-badge]][do-referral-link] @@ -292,7 +293,7 @@ flowchart TB Provisioned with Terraform using Infrastructure as Code principles. * **DigitalOcean Provider:** Droplet provisioning with cloud-init, block volume management, and firewall configuration. -* **Cloudflare Provider:** R2 bucket used for Terraform remote state and DNS record management for full HTTPS support. +* **Cloudflare Provider:** An R2 bucket used for Terraform remote state and DNS record management for full HTTPS support. ## Project Structure @@ -307,20 +308,49 @@ Provisioned with Terraform using Infrastructure as Code principles. |-- Cargo.toml |-- Dockerfile |-- Dockerfile.dev +|-- GEMINI.md |-- LICENSE |-- Makefile |-- README.md |-- docker-compose.dev.yml |-- docker-compose.yml |-- docker-entrypoint.sh -|-- docs -| `-- images -| |-- Themed-Architecture-Diagram-code.md -| `-- Themed-Architecture-Diagram.svg +|-- downloads +|-- infra +| |-- common +| | |-- browse.html +| | |-- certificates +| | | |-- api.nadzu.me.key +| | | `-- api.nadzu.me.pem +| | `-- cloud-init.template +| `-- digitalocean +| |-- accounts +| | `-- naduns-team +| | |-- backend.tf +| | |-- main.tf +| | |-- outputs.tf +| | |-- terraform.tfvars +| | |-- terraform.tfvars.example +| | `-- variables.tf +| `-- components +| |-- c-cloudflare-record.tf +| |-- c-cloudflare-zone.tf +| |-- c-digitalocean-firewall.tf +| |-- data.tf +| |-- locals.tf +| |-- outputs.tf +| |-- provider.tf +| |-- r-digitalocean_droplet.tf +| |-- r-digitalocean_volume.tf +| |-- r-digitalocean_volume_attachment.tf +| |-- variables.tf +| `-- versions.tf |-- nadunssh |-- postman | |-- collections | | `-- Nadzu API +| | |-- Contributions (Default User).request.yaml +| | |-- Contributions (Specific User).request.yaml | | |-- Health.request.yaml | | |-- Root.request.yaml | | |-- Validate User.request.yaml @@ -330,16 +360,21 @@ Provisioned with Terraform using Infrastructure as Code principles. | | |-- YT-DLP List Jobs.request.yaml | | |-- YT-DLP Stream Job Progress.request.yaml | | `-- supported sites.request.yaml -| `-- environments -| `-- Nadzu Local.yaml +| |-- environments +| | |-- Nadzu Local.environment.yaml +| | |-- dev.environment.yaml +| | `-- nadzu prod.environment.yaml |-- rustfmt.toml |-- src | |-- app.rs +| |-- bin +| | `-- config_exit.rs | |-- config.rs | |-- controllers | | |-- api | | | |-- mod.rs | | | `-- v1 +| | | |-- contributions_controller.rs | | | |-- mod.rs | | | `-- ytdlp_controller.rs | | |-- error_controller.rs @@ -365,20 +400,25 @@ Provisioned with Terraform using Infrastructure as Code principles. | | |-- mod.rs | | `-- rate_limit.rs | |-- models -| | |-- health_model.rs +| | |-- contributions.rs +| | |-- github_dto.rs +| | |-- health.rs | | |-- mod.rs -| | |-- validation_model.rs -| | `-- ytdlp_model.rs +| | |-- validation.rs +| | |-- ytdlp.rs +| | `-- ytdlp_dto.rs | |-- routes | | |-- api | | | |-- mod.rs | | | `-- v1 +| | | |-- contributions_routes.rs | | | |-- mod.rs | | | `-- ytdlp_routes.rs | | |-- health_routes.rs | | |-- mod.rs | | `-- validation_routes.rs | |-- services +| | |-- contributions.rs | | |-- mod.rs | | `-- ytdlp | | `-- mod.rs @@ -388,6 +428,7 @@ Provisioned with Terraform using Infrastructure as Code principles. | | |-- auth_tests.rs | | |-- captcha_tests.rs | | |-- common.rs +| | |-- contributions_tests.rs | | |-- cors_tests.rs | | |-- health_tests.rs | | |-- rate_limit_tests.rs @@ -397,6 +438,7 @@ Provisioned with Terraform using Infrastructure as Code principles. | | `-- ytdlp_tests.rs | |-- api_tests.rs | `-- layer_unit_tests.rs + ``` @@ -427,6 +469,29 @@ infra/ +## Development Workflow + +* Iterative development flow designed for fast delivery. +* Makefile-first approach for task automation and consistency. +* CI pipeline using GitHub Actions for linting (`cargo clippy`), testing (`cargo test`), and building. +* Unit and integration test coverage. +* Production-like local development environment using Docker Compose and Caddy with self-signed TLS. +* Active [**Public Changelog**][changelog] including release notes. + +## Packaging and Deployment + +* Dockerized using multi-stage and multi-platform builds (5-stage build with tini). +* Optimized Rust builds using Cargo-Chef. +* Custom Docker builder implementing ZSTD compression. +* Pre-installed dependencies: yt-dlp Python packages, FFmpeg, and FFprobe for media validation. +* Published automatically to a private GitHub Container Registry. + +## Things I Learned + +- Rust: The initial learning curve is steep, but the long-term benefits in performance, safety, and low-level control are worth it. +- Terraform: cloud-init is excellent for bootstrapping a server, but it has provider-specific size limits. +- Terraform: The Cloudflare provider only supports R2 buckets; use the AWS Terraform provider for object uploads to R2. + ## Acknowledgements * [**yt-dlp**][yt-dlp-repo] @@ -437,9 +502,3 @@ infra/ [do-referral-badge]: https://web-platforms.sfo2.cdn.digitaloceanspaces.com/WWW/Badge%202.svg [do-referral-link]: https://www.digitalocean.com/?refcode=17bb57d3d632&utm_campaign=Referral_Invite&utm_medium=Referral_Program&utm_source=badge [yt-dlp-repo]: https://github.com/yt-dlp/yt-dlp - -## Things I Learned - -- Rust: The initial learning curve is steep, but the long-term benefits in performance, safety, and low-level control are worth it. -- Terraform: cloud-init is excellent for bootstrapping a server, but it has provider-specific size limits. -- Terraform: The Cloudflare provider only supports R2 buckets; use the AWS Terraform provider for object uploads to R2. \ No newline at end of file diff --git a/src/controllers/api/v1/ytdlp_controller.rs b/src/controllers/api/v1/ytdlp_controller.rs index cfd960a..2b40359 100644 --- a/src/controllers/api/v1/ytdlp_controller.rs +++ b/src/controllers/api/v1/ytdlp_controller.rs @@ -140,50 +140,56 @@ pub async fn stream_download_progress( } } - while let Ok(updated_id) = broadcast_rx.recv().await { - if updated_id != stream_job_id { - continue; - } + loop { + match broadcast_rx.recv().await { + Ok(updated_id) => { + if updated_id == stream_job_id { + if let Some(job) = manager.get_job(&stream_job_id) { + let job_resp = YtdlpJobResponse::from(job); + let snapshot = serde_json::to_string(&job_resp).unwrap_or_default(); - if let Some(job) = manager.get_job(&stream_job_id) { - let job_resp = YtdlpJobResponse::from(job); - let snapshot = serde_json::to_string(&job_resp).unwrap_or_default(); + if snapshot != last_snapshot { + last_snapshot.clone_from(&snapshot); + if tx + .send(Ok(Event::default().event("progress").data(snapshot))) + .await + .is_err() + { + break; + } + } - if snapshot != last_snapshot { - last_snapshot.clone_from(&snapshot); - if tx - .send(Ok(Event::default().event("progress").data(snapshot))) - .await - .is_err() - { - break; + if matches!( + job_resp.status, + YtdlpJobStatus::Finished | YtdlpJobStatus::Failed + ) { + let _ = tx + .send(Ok(Event::default().event("done").data("done"))) + .await; + info!( + "sse stream complete path={} job_id={} client_ip={} status={:?}", + stream_path, stream_job_id, stream_client_ip, job_resp.status + ); + break; + } + } else { + let _ = tx + .send(Ok(Event::default() + .event("error") + .data(r#"{"status":404,"message":"job not found"}"#))) + .await; + info!( + "sse stream ended path={} job_id={} client_ip={} reason=job_not_found", + stream_path, stream_job_id, stream_client_ip + ); + break; + } } } - - if matches!( - job_resp.status, - YtdlpJobStatus::Finished | YtdlpJobStatus::Failed - ) { - let _ = tx - .send(Ok(Event::default().event("done").data("done"))) - .await; - info!( - "sse stream complete path={} job_id={} client_ip={} status={:?}", - stream_path, stream_job_id, stream_client_ip, job_resp.status - ); + Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {} + Err(tokio::sync::broadcast::error::RecvError::Closed) => { break; } - } else { - let _ = tx - .send(Ok(Event::default() - .event("error") - .data(r#"{"status":404,"message":"job not found"}"#))) - .await; - info!( - "sse stream ended path={} job_id={} client_ip={} reason=job_not_found", - stream_path, stream_job_id, stream_client_ip - ); - break; } } diff --git a/src/services/ytdlp/mod.rs b/src/services/ytdlp/mod.rs index bddeecc..47d6153 100644 --- a/src/services/ytdlp/mod.rs +++ b/src/services/ytdlp/mod.rs @@ -342,15 +342,32 @@ impl YtdlpManager { ); } else { let mut moved_files = Vec::with_capacity(files.len()); + let mut move_error: Option = None; for file in files { let from = temp_dir.join(&file); let to = PathBuf::from(&output_dir).join(&file); - if fs::rename(&from, &to).await.is_ok() { - moved_files.push(file); + match fs::rename(&from, &to).await { + Ok(()) => moved_files.push(file), + Err(err) => { + move_error.get_or_insert_with(|| { + format!( + "failed to move downloaded file into output directory: {err}" + ) + }); + } } } - self.mark_job_finished(&id, moved_files); - info!("finished ytdlp job id={id}"); + if moved_files.is_empty() { + self.mark_job_failed( + &id, + move_error.unwrap_or_else(|| { + "yt-dlp completed without producing a downloadable file".to_string() + }), + ); + } else { + self.mark_job_finished(&id, moved_files); + info!("finished ytdlp job id={id}"); + } } let _ = fs::remove_dir_all(&temp_dir).await; } From d97e47cfa40f9ba69edf837bddfb3a7b87ea82c0 Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Sun, 3 May 2026 08:01:31 +0530 Subject: [PATCH 4/7] refactor: enhance documentation and improve code clarity across multiple modules --- GEMINI.md | 1 + README.md | 2 +- src/config.rs | 14 +++++++++++++- src/error.rs | 9 +++++++++ src/lib.rs | 13 +++++++++++++ src/models/health.rs | 3 +++ src/models/validation.rs | 12 ++++++++++++ src/models/ytdlp.rs | 21 +++++++++++++++++++++ src/state.rs | 6 ++++++ 9 files changed, 79 insertions(+), 2 deletions(-) diff --git a/GEMINI.md b/GEMINI.md index 240d222..b8027b0 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -61,6 +61,7 @@ This document serves as the foundational mandate for all engineering work on thi * **Clippy**: Must be zero-warning. * **Rustfmt**: Must be applied to every file. * **Makefile**: Use `make c` for a full validation suite before concluding any task. +* Use -j (nproc) for parallel builds and tests to speed up the shell commands. ### Documentation * All public-facing methods and services must have `///` (Rustdoc) comments explaining intent and behavior. diff --git a/README.md b/README.md index 5057e9c..ff321be 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Personal backend API built with Rust, focused on concurrency, performance, secur ### Security and Anti-Abuse * CAPTCHA verification middleware powered by reCAPTCHA. -* API Key have Higher rate limits and access to protected endpoints. +* API keys have higher rate limits and access to protected endpoints. * Caddy reverse proxy for TLS termination and IP-based access control. ### Operational diff --git a/src/config.rs b/src/config.rs index 7bb7e2d..c0ea0cb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -44,23 +44,35 @@ fn env_opt(key: &str) -> Option { .filter(|v| !v.is_empty()) } -/// Application configuration loaded from environment variables. +/// Application configuration settings loaded from the environment. #[derive(Clone)] pub struct AppConfig { + /// The application name. pub name: String, + /// The environment (e.g., "development", "production"). pub env: String, + /// The host to bind to. pub host: String, + /// The port to bind to. pub port: u16, + /// Allowed origins for CORS. pub allowed_origins: Option, + /// Directory for downloaded files. pub download_dir: String, + /// Path to the yt-dlp executable. pub ytdlp_path: String, + /// Optional external downloader for yt-dlp. pub ytdlp_external_downloader: Option, + /// Optional arguments for the external downloader. pub ytdlp_external_downloader_args: Option, + /// Maximum concurrent yt-dlp downloads. pub max_concurrent_downloads: usize, captcha_secret_key: Option, master_api_key: String, github_pat: Option, + /// GitHub username for contributions. pub github_username: Option, + /// GitHub GraphQL API URL. pub github_graphql_url: String, } diff --git a/src/error.rs b/src/error.rs index 8b54416..f087c9e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -17,29 +17,38 @@ struct ErrorResponse { error_code: Option>, } +/// Represents the various errors that can occur within the application. #[derive(Debug, Error)] pub enum AppError { + /// An internal server error, typically wrapping an `anyhow::Error`. #[error("Internal Server Error")] Internal(#[from] anyhow::Error), + /// A requested resource was not found. #[error("Not Found: {0}")] NotFound(String), + /// A validation error occurred on the incoming request. #[error("Validation failed: {0}")] Validation(String), + /// The client is not authorized to access the resource. #[error("Unauthorized: {0}")] Unauthorized(String), + /// The client is forbidden from accessing the resource. #[error("Forbidden: {0}")] Forbidden(String), + /// A conflict occurred (e.g., duplicate resource). #[error("Conflict: {0}")] Conflict(String), + /// An error occurred communicating with an upstream service. #[error("Upstream Service Error: {0}")] UpstreamError(String), + /// The service is currently unavailable. #[error("Service Unavailable: {0}")] ServiceUnavailable(String), } diff --git a/src/lib.rs b/src/lib.rs index af8bdc1..6eb3b09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,23 @@ +//! The root library module for Nadzu API. +//! This module exposes all core application modules. + +/// Application startup and runtime orchestration. pub mod app; +/// Configuration management. pub mod config; +/// HTTP controllers for routing. pub mod controllers; +/// Error types and handling. pub mod error; +/// Request extractors. pub mod extractors; +/// Tower middleware implementations. pub mod middleware; +/// Data structures and domain models. pub mod models; +/// Application routing definitions. pub mod routes; +/// Business logic and core services. pub mod services; +/// Global application state. pub mod state; diff --git a/src/models/health.rs b/src/models/health.rs index 9695383..2fa2603 100644 --- a/src/models/health.rs +++ b/src/models/health.rs @@ -1,8 +1,11 @@ use serde::Serialize; +/// Represents the health status of the application. #[derive(Serialize, Debug)] pub struct Health { + /// The current status ("ok"). pub status: &'static str, + /// The version of the application. pub version: &'static str, } diff --git a/src/models/validation.rs b/src/models/validation.rs index 399d6c3..2d8b0df 100644 --- a/src/models/validation.rs +++ b/src/models/validation.rs @@ -1,8 +1,10 @@ use serde::{Deserialize, Serialize}; use validator::Validate; +/// The request payload for validating a user. #[derive(Debug, Deserialize, Validate)] pub struct ValidateUserRequest { + /// The name of the user, must be between 3 and 20 characters. #[validate(length( min = 3, max = 20, @@ -10,23 +12,33 @@ pub struct ValidateUserRequest { ))] pub name: String, + /// The email address of the user. #[validate(email(message = "Invalid email address"))] pub email: String, + /// The age of the user, must be between 18 and 30. #[validate(range(min = 18, max = 30, message = "Age must be between 18 and 30"))] pub age: u8, } +/// The response returned when a user validation is successful. #[derive(Debug, Serialize)] pub struct ValidatedUserResponse { + /// Indicates if the validation was successful. pub success: bool, + /// An informational message regarding the validation result. pub message: std::borrow::Cow<'static, str>, + /// The validated user data. pub data: UserData, } +/// The inner user data returned upon successful validation. #[derive(Debug, Serialize)] pub struct UserData { + /// The validated name. pub name: String, + /// The validated email. pub email: String, + /// The validated age. pub age: u8, } diff --git a/src/models/ytdlp.rs b/src/models/ytdlp.rs index d7b0962..c4682f5 100644 --- a/src/models/ytdlp.rs +++ b/src/models/ytdlp.rs @@ -4,29 +4,50 @@ use serde::{Deserialize, Serialize}; /// This includes internal state like `output_dir` and `format_flag` which are not exposed to clients. #[derive(Debug, Serialize, Clone)] pub struct YtdlpJob { + /// The unique identifier of the job. pub id: String, + /// The URL being downloaded. pub url: String, + /// The current status of the job. pub status: YtdlpJobStatus, + /// Internal: The output directory for downloaded files. pub output_dir: String, + /// Internal: The format flag passed to yt-dlp. pub format_flag: String, + /// Internal: The sort flag passed to yt-dlp. pub sort_flag: Option, + /// UNIX timestamp when the job started. pub started_at_unix: Option, + /// UNIX timestamp when the job finished. pub finished_at_unix: Option, + /// The current progress percentage. pub progress_percent: Option, + /// The total size of the download if known. pub progress_total: Option, + /// The current download speed. pub progress_speed: Option, + /// The estimated time of arrival. pub progress_eta: Option, + /// General progress message or extraction details. pub progress_message: Option, + /// UNIX timestamp when the job was last updated. pub updated_at_unix: Option, + /// The list of files downloaded by the job. pub files: Option>, + /// Any error message encountered during the job. pub error: Option, } +/// The status of a yt-dlp download job. #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum YtdlpJobStatus { + /// The job is queued and waiting to start. Queued, + /// The job is currently running. Running, + /// The job has finished successfully. Finished, + /// The job failed. Failed, } diff --git a/src/state.rs b/src/state.rs index b2cb710..aa71e84 100644 --- a/src/state.rs +++ b/src/state.rs @@ -4,11 +4,17 @@ use crate::services::contributions::ContributionsService; use crate::services::ytdlp::YtdlpManager; use std::sync::Arc; +/// Represents the global application state shared across all routes and services. #[derive(Clone, Debug)] pub struct AppState { + /// Global application configuration. pub config: Arc, + /// Manager for YT-DLP processes and jobs. pub ytdlp_manager: Arc, + /// Rate limiters for different endpoints. pub rate_limiters: Arc, + /// Global HTTP client for making outbound requests. pub http_client: reqwest::Client, + /// Service for handling GitHub contribution data. pub contributions_service: Arc, } From cedfeaf4506e8321fee4b9e380f94072ab95fe6a Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Tue, 5 May 2026 11:26:40 +0530 Subject: [PATCH 5/7] chore: update .gitignore and Cargo.lock for dependency management; enhance variable documentation in Terraform files --- .gitignore | 1 + Cargo.lock | 84 +++++++++---------- .../accounts/naduns-team/variables.tf | 7 ++ infra/digitalocean/components/locals.tf | 2 + 4 files changed, 52 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 367ad50..c1a3cc6 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ nadunssh id_rsa id_ed25519 /postman/collections/*/.resources/definition.yaml +.agents diff --git a/Cargo.lock b/Cargo.lock index c8de799..5bcec6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,9 +56,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -194,9 +194,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.60" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ "find-msvc-tools", "jobserver", @@ -218,9 +218,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ "brotli", "compression-core", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "crc32fast" @@ -544,9 +544,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" dependencies = [ "atomic-waker", "bytes", @@ -816,9 +816,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -868,9 +868,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.95" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" dependencies = [ "cfg-if", "futures-util", @@ -886,9 +886,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "litemap" @@ -1376,9 +1376,9 @@ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustls" -version = "0.23.38" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "once_cell", "ring", @@ -1390,9 +1390,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -1400,9 +1400,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.12" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", @@ -1680,9 +1680,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.52.0" +version = "1.52.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91135f59b1cbf38c91e73cf3386fca9bb77915c45ce2771460c9d92f0f3d776" +checksum = "110a78583f19d5cdb2c5ccf321d1290344e71313c6c37d43520d386027d18386" dependencies = [ "bytes", "libc", @@ -2003,18 +2003,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.2+wasi-0.2.9" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" dependencies = [ "cfg-if", "once_cell", @@ -2025,9 +2025,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.68" +version = "0.4.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" dependencies = [ "js-sys", "wasm-bindgen", @@ -2035,9 +2035,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2045,9 +2045,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" dependencies = [ "bumpalo", "proc-macro2", @@ -2058,18 +2058,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.95" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" dependencies = [ "js-sys", "wasm-bindgen", @@ -2087,9 +2087,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" dependencies = [ "rustls-pki-types", ] @@ -2303,9 +2303,9 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.51.0" +version = "0.57.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" [[package]] name = "writeable" diff --git a/infra/digitalocean/accounts/naduns-team/variables.tf b/infra/digitalocean/accounts/naduns-team/variables.tf index f43eb54..bdca09e 100644 --- a/infra/digitalocean/accounts/naduns-team/variables.tf +++ b/infra/digitalocean/accounts/naduns-team/variables.tf @@ -161,39 +161,46 @@ variable "WARP_LICENSE_KEY" { } variable "CLOUDFLARE_API_TOKEN" { + //SECRET: Expected to be set via root TF_VAR_CLOUDFLARE_API_TOKEN. never Declare description = "Cloudflare API token" type = string sensitive = true } variable "CLOUDFLARE_ZONE_NAME" { + //VAR: Declare on terraform.tfvars description = "Cloudflare zone name (e.g. nadzu.me)" type = string } variable "CLOUDFLARE_RECORD_NAME" { + //VAR: Declare on terraform.tfvars description = "Cloudflare DNS record name (e.g. api)" type = string } variable "CLOUDFLARE_RECORD_PROXIED" { + //VAR: Declare on terraform.tfvars description = "Whether the Cloudflare record should be proxied" type = bool default = true } variable "SSH_ALLOWED_IPS" { + //VAR: Declare on terraform.tfvars description = "List of IP addresses allowed to SSH into the droplet" type = list(string) } variable "MASTER_API_KEY" { + //SECRET: Expected to be set via root TF_VAR_MASTER_API_KEY. never Declare description = "Master API key for the application" type = string sensitive = true } variable "CADDY_CUSTOM_BROWSE_FILE_URL" { + //VAR: Declare on terraform.tfvars description = "Presigned URL to download custom browse.html" type = string } diff --git a/infra/digitalocean/components/locals.tf b/infra/digitalocean/components/locals.tf index ddd00a7..3305daf 100644 --- a/infra/digitalocean/components/locals.tf +++ b/infra/digitalocean/components/locals.tf @@ -4,6 +4,8 @@ locals { var.ENVIRONMENT ] + # Deterministic path guaranteed by DigitalOcean via SCSI hardware ID. + # Prevents accidentally formatting the wrong drive since paths like /dev/sdb can change. volume_device_by_id = "/dev/disk/by-id/scsi-0DO_Volume_${var.VOLUME_NAME}" cloud_init = templatefile("${path.module}/../../common/cloud-init.template", { From e0eebe390dca6db85979514ab9f969e41a11e110 Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Wed, 6 May 2026 06:58:11 +0530 Subject: [PATCH 6/7] refactor: update API key handling and improve middleware structure; enhance documentation and validation --- .github/PULL_REQUEST_TEMPLATE.md | 6 +- GEMINI.md | 4 +- TODO.md | 124 ++++++++++++++++++ src/config.rs | 3 +- .../api/v1/contributions_controller.rs | 2 +- src/controllers/api/v1/ytdlp_controller.rs | 2 +- src/controllers/health_controller.rs | 1 + src/controllers/root_controller.rs | 2 +- src/controllers/validation_controller.rs | 4 +- src/db/mod.rs | 2 + .../{validated_json.rs => json_validator.rs} | 1 + src/extractors/mod.rs | 3 +- src/middleware/api_key.rs | 28 +++- src/middleware/auth.rs | 26 ---- src/middleware/captcha.rs | 20 ++- src/middleware/cors.rs | 30 +++-- src/middleware/mod.rs | 7 +- src/middleware/rate_limit.rs | 34 ++++- src/routes/api/v1/ytdlp_routes.rs | 2 +- tests/api/auth_tests.rs | 6 +- tests/api/captcha_tests.rs | 6 +- tests/api/common.rs | 2 +- tests/api/cors_tests.rs | 5 + tests/api/rate_limit_tests.rs | 4 +- tests/api/ytdlp_tests.rs | 6 +- tests/layer_unit_tests.rs | 6 +- 26 files changed, 251 insertions(+), 85 deletions(-) create mode 100644 TODO.md rename src/extractors/{validated_json.rs => json_validator.rs} (97%) delete mode 100644 src/middleware/auth.rs diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f0d20e5..7c128ed 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,10 +1,10 @@ ## Description - + ## Types of changes - + - [ ] ## Checklist - [ ] Updated ChangeLog -- [ ] +- [ ] All tests passed. diff --git a/GEMINI.md b/GEMINI.md index b8027b0..ac2d640 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,6 +1,6 @@ # Nadzu Backend - Engineering Standards & Policies -This document serves as the foundational mandate for all engineering work on this codebase. It applies to both human developers and AI agents. Strict adherence is required to maintain the "Anti-Corruption Layer" and high-performance nature of the system. +This document serves as the foundational mandate for all engineering work on naduns codebase. It applies to both human developers and AI agents. Strict adherence is required to maintain the security and high-performance nature of the system. ## 1. Architectural Integrity @@ -64,7 +64,7 @@ This document serves as the foundational mandate for all engineering work on thi * Use -j (nproc) for parallel builds and tests to speed up the shell commands. ### Documentation -* All public-facing methods and services must have `///` (Rustdoc) comments explaining intent and behavior. +* All public-facing methods and services must have `///` (Rustdoc) comments explaining intent and behavior.do not over document, make guesses about the unseen code. * Complex logic (like the Midnight Snap caching strategy) must be documented inline. --- diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..ad0ce7b --- /dev/null +++ b/TODO.md @@ -0,0 +1,124 @@ +# Architectural Improvement Plan +> Based on design review of `nxdun/rust-codebase`. Priority ordered. Each task has a WHY. + +--- + +## TASK 1 — Introduce a Bounded Download Job Queue +**Priority: CRITICAL** +**Files affected:** `src/services/ytdlp/`, `src/controllers/` (ytdlp), `src/state.rs` + +### What to do +- Create a `JobQueue` struct wrapping a `tokio::sync::Semaphore` with a configurable `MAX_CONCURRENT_DOWNLOADS` env cap (e.g. 3). +- Assign each download request a `job_id` (UUID) on submission. +- Store job state (`Pending | Running | Done | Failed`) in Redis with a TTL of ~1 hour. +- The controller returns `202 Accepted` + `{ job_id }` immediately instead of blocking. +- Add a `GET /api/v1/ytdlp/job/:job_id` polling endpoint that reads status from Redis. + +### Why +Right now `tokio::process::Command` is spawned directly per request with no concurrency cap. Two concurrent users = two yt-dlp processes. Ten users = ten processes. This is a fork bomb. The app will OOM or the OS will kill processes randomly. The `202 + poll` pattern is the industry standard for long-running media jobs (YouTube itself uses it). This is the single highest-risk issue in the current design. + +--- + +## TASK 2 — Break AppState into Scoped Injection +**Priority: HIGH** +**Files affected:** `src/state.rs`, `src/app.rs`, all `src/controllers/`, all `src/services/` + +### What to do +- Split `AppState` into two purpose-scoped structs: + - `InfraState` — holds `PgPool`, `RedisPool`, `http_client`, `config` + - `AppState` — holds `ytdlp_manager`, `rate_limiters`, `contributions_service` (business layer) +- Inject `InfraState` directly into services via constructor, not through AppState pass-through. +- Services should not need to reach through AppState to get a DB pool. + +### Why +The current `AppState` in [`src/state.rs`](https://github.com/nxdun/rust-codebase/blob/554569cd0754247fa9020c8745174ef5087e4628/src/state.rs) mixes infrastructure handles (`http_client`, `config`) with business service instances (`ytdlp_manager`, `contributions_service`). This means every unit test for a service must construct the entire AppState including all infra. Splitting them makes services independently testable and removes the implicit coupling between unrelated concerns. + +--- + +## TASK 3 — Add Signed URL File Serving +**Priority: HIGH** +**Files affected:** Caddyfile, new `src/controllers/files.rs`, `src/routes/` + +### What to do +- Remove the direct Caddy volume serve on `/nadun/fs/*` (IP-only gating). +- Add a `POST /api/v1/ytdlp/job/:job_id/download-link` endpoint in Rust that: + - Verifies job is `Done` + - Generates a signed token: `HMAC-SHA256(job_id + expiry_timestamp, SECRET_KEY)` + - Returns `{ url: "/files/:token", expires_at }` +- Add a `GET /files/:token` route that validates the token and streams the file from the volume. + +### Why +The current design serves raw files from block storage gated only by a Caddy IP check. If Cloudflare IP ranges change, or the Caddyfile is misconfigured once, the entire `/downloads` volume becomes publicly browsable. Signed short-lived URLs are the standard (S3 pre-signed URLs work this way). This protects other users' downloaded files from being enumerated. + +--- + +## TASK 4 — Add Explicit Concurrency Annotations to Diagrams +**Priority: MEDIUM** +**Files affected:** `README.md` / diagram source + +### What to do +- On the Core System diagram, add a `JobQueue [Semaphore, max=N]` node between `YtCtrl` and `YtSvc`. +- On the Infrastructure diagram, annotate the Volume serve path with `[signed token required]`. +- Add a note on the Droplet subgraph: `Single-node, no HA — intentional for portfolio scope`. + +### Why +Right now the diagrams imply things that aren't true (unlimited worker concurrency, unguarded file access). A reviewer — recruiter, senior engineer, or tech lead — will spot the missing pieces. Annotating known limitations intentionally signals maturity. "I know this is single-node and here's why" is infinitely better than leaving them to wonder if you missed it. + +--- + +## TASK 5 — Add `src/services/ytdlp/queue.rs` Module +**Priority: MEDIUM (enables Task 1)** +**Files affected:** `src/services/ytdlp/` (new file) + +### What to do +Create `src/services/ytdlp/queue.rs` with: +```rust +pub struct DownloadJob { + pub id: Uuid, + pub url: String, + pub status: JobStatus, + pub created_at: DateTime, +} + +pub enum JobStatus { + Pending, + Running, + Done { output_path: PathBuf }, + Failed { reason: String }, +} + +pub struct JobQueue { + semaphore: Arc, + redis: Arc, +} +``` + +### Why +The `src/services/ytdlp/` directory currently exists but has no queue abstraction. This is the concrete module that implements Task 1. Without a typed `JobStatus` enum, job state gets stored as raw strings in Redis — a source of bugs when reading back state. + +--- + +## TASK 6 — Write Integration Tests for Download Flow +**Priority: LOW (but visible to recruiters)** +**Files affected:** `tests/` + +### What to do +- Add `tests/ytdlp_integration.rs` that mocks `tokio::process::Command` output. +- Test: submit job → get `202` + `job_id` → poll until `Done` → verify file token endpoint returns valid URL. +- Use `wiremock` or `httpmock` for external service mocking. + +### Why +The `tests/` directory exists but is currently sparse. For a portfolio backend, integration tests are the difference between "claims it works" and "proves it works". This is the first thing a technical reviewer will check after reading the README. + +--- + +## Summary Table + +| Task | Impact | Effort | Do First? | +|------|--------|--------|-----------| +| 1 — Job Queue | Prevents OOM / production crash | Medium | YES | +| 2 — Split AppState | Testability, clean architecture | Low | YES | +| 3 — Signed URLs | Security, file privacy | Medium | YES | +| 4 — Diagram annotations | Recruiter impression | Trivial | Quick win | +| 5 — `queue.rs` module | Enables Task 1 | Low | With Task 1 | +| 6 — Integration tests | Portfolio credibility | Medium | After 1-3 | \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index c0ea0cb..2d3edd2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -164,7 +164,8 @@ impl AppConfig { )) } - /// Securely checks if the provided key matches the master API key using constant-time comparison. + /// check API KEY match with provided + constant time eq + /// uses: api key validation middleware #[must_use] pub fn check_api_key(&self, provided_key: &str) -> bool { constant_time_eq(provided_key, &self.master_api_key) diff --git a/src/controllers/api/v1/contributions_controller.rs b/src/controllers/api/v1/contributions_controller.rs index 9913fe6..ebc5a15 100644 --- a/src/controllers/api/v1/contributions_controller.rs +++ b/src/controllers/api/v1/contributions_controller.rs @@ -6,7 +6,7 @@ use serde::Deserialize; use crate::{error::AppError, models::contributions::ContributionsResponse, state::AppState}; -/// Request query parameters for the contributions endpoint. +/// Request struct #[derive(Debug, Deserialize)] pub struct ContributionsQuery { pub username: Option, diff --git a/src/controllers/api/v1/ytdlp_controller.rs b/src/controllers/api/v1/ytdlp_controller.rs index 2b40359..750c03f 100644 --- a/src/controllers/api/v1/ytdlp_controller.rs +++ b/src/controllers/api/v1/ytdlp_controller.rs @@ -17,7 +17,7 @@ use tracing::info; use crate::{ error::AppError, - extractors::validated_json::ValidatedJson, + extractors::json_validator::ValidatedJson, models::{ ytdlp::YtdlpJobStatus, ytdlp_dto::{ diff --git a/src/controllers/health_controller.rs b/src/controllers/health_controller.rs index 4292af7..6f21ccd 100644 --- a/src/controllers/health_controller.rs +++ b/src/controllers/health_controller.rs @@ -2,6 +2,7 @@ use crate::{error::AppError, models::health::Health, state::AppState}; use axum::{Json, extract::State}; /// Health check endpoint. +/// /health pub async fn check_health(State(_state): State) -> Result, AppError> { Ok(Json(Health::ok())) } diff --git a/src/controllers/root_controller.rs b/src/controllers/root_controller.rs index ab6d778..a470b51 100644 --- a/src/controllers/root_controller.rs +++ b/src/controllers/root_controller.rs @@ -2,7 +2,7 @@ use crate::state::AppState; use axum::{Json, extract::State}; use serde_json::{Value, json}; -/// Simple root endpoint to verify the service is running. +/// root endpoint response. pub async fn root_handler(State(state): State) -> Json { let cfg = &state.config; Json(json!({ diff --git a/src/controllers/validation_controller.rs b/src/controllers/validation_controller.rs index 574f5db..f6bd398 100644 --- a/src/controllers/validation_controller.rs +++ b/src/controllers/validation_controller.rs @@ -1,12 +1,12 @@ use crate::{ error::AppError, - extractors::validated_json::ValidatedJson, + extractors::json_validator::ValidatedJson, models::validation::{UserData, ValidateUserRequest, ValidatedUserResponse}, }; use axum::Json; use std::borrow::Cow; -/// Validates user data using the custom validator extractor. +/// Validates user data/ pub async fn validate_user( ValidatedJson(payload): ValidatedJson, ) -> Result, AppError> { diff --git a/src/db/mod.rs b/src/db/mod.rs index b51b1df..1e8f85b 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,2 +1,4 @@ +/// unused pub mod postgres; +/// unused pub mod redis; diff --git a/src/extractors/validated_json.rs b/src/extractors/json_validator.rs similarity index 97% rename from src/extractors/validated_json.rs rename to src/extractors/json_validator.rs index 838760d..c326214 100644 --- a/src/extractors/validated_json.rs +++ b/src/extractors/json_validator.rs @@ -9,6 +9,7 @@ use validator::Validate; use crate::error::AppError; +///wrapped,automatic: JSON validator ;] #[derive(Debug)] pub struct ValidatedJson(pub T); diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs index 7b0daa0..3dcf897 100644 --- a/src/extractors/mod.rs +++ b/src/extractors/mod.rs @@ -1 +1,2 @@ -pub mod validated_json; +/// Json validator module for the API. +pub mod json_validator; diff --git a/src/middleware/api_key.rs b/src/middleware/api_key.rs index 8e959de..e075470 100644 --- a/src/middleware/api_key.rs +++ b/src/middleware/api_key.rs @@ -1,12 +1,32 @@ -use axum::http::HeaderMap; +use axum::{ + extract::{Request, State}, + http::HeaderMap, + middleware::Next, + response::Response, +}; -use crate::{config::AppConfig, middleware::X_API_KEY}; +use crate::{config::AppConfig, error::AppError, middleware::HEADER_API_KEY, state::AppState}; -/// Checks if the request headers contain a valid master API key. +/// validation.inexpensive: Checks if the request headers contain a valid master API key. #[must_use] pub fn has_valid_master_api_key(headers: &HeaderMap, config: &AppConfig) -> bool { headers - .get(X_API_KEY) + .get(HEADER_API_KEY) .and_then(|v| v.to_str().ok()) .is_some_and(|v| config.check_api_key(v)) } + +/// Middleware that requires a valid master API key to be present in the headers. +pub async fn require_api_key( + State(state): State, + req: Request, + next: Next, +) -> Result { + if has_valid_master_api_key(req.headers(), state.config.as_ref()) { + Ok(next.run(req).await) + } else { + Err(AppError::Unauthorized( + "Invalid or missing API key".to_string(), + )) + } +} diff --git a/src/middleware/auth.rs b/src/middleware/auth.rs deleted file mode 100644 index 0c4eae2..0000000 --- a/src/middleware/auth.rs +++ /dev/null @@ -1,26 +0,0 @@ -use axum::{ - extract::{Request, State}, - middleware::Next, - response::Response, -}; - -use crate::{error::AppError, middleware::X_API_KEY, state::AppState}; - -/// Middleware that requires a valid master API key to be present in the headers. -pub async fn require_api_key( - State(state): State, - req: Request, - next: Next, -) -> Result { - let api_key = req - .headers() - .get(X_API_KEY) - .and_then(|value| value.to_str().ok()); - - match api_key { - Some(key) if state.config.check_api_key(key) => Ok(next.run(req).await), - _ => Err(AppError::Unauthorized( - "Invalid or missing API key".to_string(), - )), - } -} diff --git a/src/middleware/captcha.rs b/src/middleware/captcha.rs index 410e200..3663ae7 100644 --- a/src/middleware/captcha.rs +++ b/src/middleware/captcha.rs @@ -8,10 +8,24 @@ use serde::Deserialize; use crate::{ error::AppError, - middleware::{X_CAPTCHA_TOKEN, api_key::has_valid_master_api_key}, + middleware::{HEADER_CAPTCHA_NAME, api_key::has_valid_master_api_key}, state::AppState, }; +const CAPTCHA_VERIFY_TIMEOUT_SECS: u64 = 10; + +// Checkpoint +const _: () = { + assert!( + CAPTCHA_VERIFY_TIMEOUT_SECS > 0, + "CAPTCHA_VERIFY_TIMEOUT_SECS must be positive" + ); + assert!( + CAPTCHA_VERIFY_TIMEOUT_SECS <= 60, + "CAPTCHA_VERIFY_TIMEOUT_SECS is unusually high (> 1min)" + ); +}; + #[derive(Debug, Deserialize)] struct CaptchaProviderResponse { success: bool, @@ -34,7 +48,7 @@ pub async fn verify_captcha_token( let captcha_token = req .headers() - .get(X_CAPTCHA_TOKEN) + .get(HEADER_CAPTCHA_NAME) .and_then(|value| value.to_str().ok()) .map(str::trim) .filter(|value| !value.is_empty()) @@ -51,7 +65,7 @@ pub async fn verify_captcha_token( let response = state .http_client .post("https://www.google.com/recaptcha/api/siteverify") - .timeout(std::time::Duration::from_secs(10)) + .timeout(std::time::Duration::from_secs(CAPTCHA_VERIFY_TIMEOUT_SECS)) .form(&[("secret", secret_key), ("response", captcha_token)]) .send() .await diff --git a/src/middleware/cors.rs b/src/middleware/cors.rs index b828a61..85a6195 100644 --- a/src/middleware/cors.rs +++ b/src/middleware/cors.rs @@ -1,6 +1,6 @@ use crate::{ config::AppConfig, - middleware::{X_API_KEY, X_CAPTCHA_TOKEN}, + middleware::{HEADER_API_KEY, HEADER_CAPTCHA_NAME}, }; use axum::http::{ Method, @@ -16,6 +16,16 @@ enum OriginMatcher { Wildcard { prefix: String, suffix: String }, } +const DEFAULT_METHODS: [Method; 2] = [Method::GET, Method::POST]; + +// Checkpoint +const _: () = { + assert!( + DEFAULT_METHODS.len() == 2, + "CORS should only allow 2 methods" + ); +}; + pub fn build_cors(config: &AppConfig) -> CorsLayer { let allowed_origins_env = config.allowed_origins.clone().unwrap_or_default(); let raw_origins: Vec = allowed_origins_env @@ -24,26 +34,18 @@ pub fn build_cors(config: &AppConfig) -> CorsLayer { .filter(|s| !s.is_empty()) .collect(); - let default_methods = [ - Method::GET, - Method::POST, - Method::PUT, - Method::PATCH, - Method::DELETE, - ]; - if raw_origins.is_empty() || raw_origins == ["none"] { info!("CORS disabled (no origins allowed)"); return CorsLayer::new() .allow_origin([]) - .allow_methods(default_methods); + .allow_methods(DEFAULT_METHODS); } if raw_origins.contains(&"*".to_string()) { info!("CORS enabled for all origins (wildcard)"); return CorsLayer::new() .allow_origin(Any) - .allow_methods(default_methods) + .allow_methods(DEFAULT_METHODS) .allow_headers(Any); } @@ -106,13 +108,13 @@ pub fn build_cors(config: &AppConfig) -> CorsLayer { CorsLayer::new() .allow_origin(allow_origin) - .allow_methods(default_methods) + .allow_methods(DEFAULT_METHODS) .allow_headers([ AUTHORIZATION, CONTENT_TYPE, ACCEPT, - X_API_KEY, - X_CAPTCHA_TOKEN, + HEADER_API_KEY, + HEADER_CAPTCHA_NAME, ]) .allow_credentials(true) } diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index 70af45a..9f72a9d 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -1,13 +1,14 @@ use axum::http::header::HeaderName; pub mod api_key; -pub mod auth; pub mod captcha; pub mod cors; pub mod rate_limit; -pub const X_API_KEY: HeaderName = HeaderName::from_static("x-api-key"); -pub const X_CAPTCHA_TOKEN: HeaderName = HeaderName::from_static("x-captcha-token"); +pub const HEADER_API_KEY: HeaderName = HeaderName::from_static("x-api-key"); +pub const HEADER_CAPTCHA_NAME: HeaderName = HeaderName::from_static("x-captcha-token"); + +// Checkpoint /// Performs a constant-time comparison of two strings to prevent timing attacks. #[must_use] diff --git a/src/middleware/rate_limit.rs b/src/middleware/rate_limit.rs index 542379c..78c2dbd 100644 --- a/src/middleware/rate_limit.rs +++ b/src/middleware/rate_limit.rs @@ -23,11 +23,31 @@ const RATE_LIMITER_BURST_SIZE: u32 = 20; const ENHANCED_RATE_LIMITER_PER_SECOND: u32 = 50; const ENHANCED_RATE_LIMITER_BURST_SIZE: u32 = 100; +// Checkpoint +const _: () = { + assert!( + RATE_LIMITER_PER_SECOND > 0 + && RATE_LIMITER_BURST_SIZE > 0 + && ENHANCED_RATE_LIMITER_PER_SECOND > 0 + && ENHANCED_RATE_LIMITER_BURST_SIZE > 0, + "Rate limit constants must be positive" + ); + assert!( + RATE_LIMITER_PER_SECOND <= ENHANCED_RATE_LIMITER_PER_SECOND, + "Normal rate limit cannot exceed enhanced rate limit" + ); + assert!( + RATE_LIMITER_BURST_SIZE <= ENHANCED_RATE_LIMITER_BURST_SIZE, + "Normal burst size cannot exceed enhanced burst size" + ); +}; + type KeyedLimiter = RateLimiter, DefaultClock, NoOpMiddleware>; -/// Container for the application's rate limiting logic. -/// Supports a "Normal" tier for public users and an "Enhanced" tier for authorized API key holders. +/// Implements tiered rate limiting. request ip based. +/// enhanced: requests only with correct API key. +/// normal: all other requests. #[derive(Clone, Debug)] pub struct RateLimiters { normal: Arc, @@ -35,10 +55,9 @@ pub struct RateLimiters { } impl RateLimiters { - /// Creates a new instance of `RateLimiters` with normal and enhanced buckets. + /// Creates a new instance of `RateLimiters` /// - /// # Panics - /// Panics if the hardcoded rate limit constants are invalid (e.g., zero). + /// Panics if rate limit constants above are invalid. #[must_use] pub fn new() -> Self { Self { @@ -71,8 +90,9 @@ impl Default for RateLimiters { /// Internal helper to build a DashMap-backed rate limiter. #[allow(clippy::expect_used)] fn build_limiter(per_second: u32, burst_size: u32) -> KeyedLimiter { - let per_second = NonZeroU32::new(per_second).expect("RATE_LIMITER_PER_SECOND must be > 0"); - let burst_size = NonZeroU32::new(burst_size).expect("RATE_LIMITER_BURST_SIZE must be > 0"); + // Safety: The constants are validated at compile time by the assertions above, so these unwraps will never panic. + let per_second = NonZeroU32::new(per_second).expect("RATE_LIMITER_PER_SECOND must be positive"); + let burst_size = NonZeroU32::new(burst_size).expect("RATE_LIMITER_BURST_SIZE must be positive"); let quota = Quota::per_second(per_second).allow_burst(burst_size); RateLimiter::, DefaultClock, NoOpMiddleware>::dashmap( diff --git a/src/routes/api/v1/ytdlp_routes.rs b/src/routes/api/v1/ytdlp_routes.rs index 2021ddf..3f74059 100644 --- a/src/routes/api/v1/ytdlp_routes.rs +++ b/src/routes/api/v1/ytdlp_routes.rs @@ -5,7 +5,7 @@ use axum::{ use crate::{ controllers::api::v1::ytdlp_controller, - middleware::{auth::require_api_key, captcha::verify_captcha_token}, + middleware::{api_key::require_api_key, captcha::verify_captcha_token}, state::AppState, }; diff --git a/tests/api/auth_tests.rs b/tests/api/auth_tests.rs index b460ecf..1022192 100644 --- a/tests/api/auth_tests.rs +++ b/tests/api/auth_tests.rs @@ -1,7 +1,7 @@ use axum::http::StatusCode; use crate::common::{ - API_KEY_HEADER, TEST_MASTER_API_KEY, create_test_app, get_with_headers, send_json, + HEADER_API_KEY, TEST_MASTER_API_KEY, create_test_app, get_with_headers, send_json, }; #[tokio::test] @@ -22,7 +22,7 @@ async fn list_jobs_requires_api_key_when_invalid() { &app, get_with_headers( "/api/v1/ytdlp/jobs", - &[(API_KEY_HEADER, "not_the_master_key")], + &[(HEADER_API_KEY, "not_the_master_key")], ), ) .await; @@ -39,7 +39,7 @@ async fn list_jobs_returns_array_with_valid_api_key() { &app, get_with_headers( "/api/v1/ytdlp/jobs", - &[(API_KEY_HEADER, TEST_MASTER_API_KEY)], + &[(HEADER_API_KEY, TEST_MASTER_API_KEY)], ), ) .await; diff --git a/tests/api/captcha_tests.rs b/tests/api/captcha_tests.rs index 2c16abf..96fe363 100644 --- a/tests/api/captcha_tests.rs +++ b/tests/api/captcha_tests.rs @@ -1,7 +1,7 @@ use axum::http::StatusCode; use crate::common::{ - API_KEY_HEADER, SAMPLE_YTDLP_URL, TEST_MASTER_API_KEY, create_test_app, post_json_with_headers, + HEADER_API_KEY, SAMPLE_YTDLP_URL, TEST_MASTER_API_KEY, create_test_app, post_json_with_headers, send_json, ytdlp_enqueue_request, }; @@ -14,7 +14,7 @@ async fn valid_api_key_bypasses_captcha_and_secret_requirement() { post_json_with_headers( "/api/v1/ytdlp", &ytdlp_enqueue_request(SAMPLE_YTDLP_URL), - &[(API_KEY_HEADER, TEST_MASTER_API_KEY)], + &[(HEADER_API_KEY, TEST_MASTER_API_KEY)], ), ) .await; @@ -32,7 +32,7 @@ async fn invalid_api_key_does_not_bypass_captcha() { post_json_with_headers( "/api/v1/ytdlp", &ytdlp_enqueue_request(SAMPLE_YTDLP_URL), - &[(API_KEY_HEADER, "bad_key")], + &[(HEADER_API_KEY, "bad_key")], ), ) .await; diff --git a/tests/api/common.rs b/tests/api/common.rs index c8fac3a..8ee6fe1 100644 --- a/tests/api/common.rs +++ b/tests/api/common.rs @@ -20,7 +20,7 @@ use serde_json::{Value, json}; use std::sync::Arc; use tower::ServiceExt; -pub const API_KEY_HEADER: &str = "x-api-key"; +pub const HEADER_API_KEY: &str = "x-api-key"; pub const CAPTCHA_TOKEN_HEADER: &str = "x-captcha-token"; pub const CONTENT_TYPE_JSON: &str = "application/json"; pub const TEST_MASTER_API_KEY: &str = "test_master_key"; diff --git a/tests/api/cors_tests.rs b/tests/api/cors_tests.rs index 9557aa0..f965bec 100644 --- a/tests/api/cors_tests.rs +++ b/tests/api/cors_tests.rs @@ -54,6 +54,11 @@ async fn cors_preflight_allows_required_custom_headers() { .to_ascii_uppercase(); assert!(allow_methods.contains("GET")); assert!(allow_methods.contains("POST")); + assert!(!allow_methods.contains("PUT"), "PUT should be restricted"); + assert!( + !allow_methods.contains("DELETE"), + "DELETE should be restricted" + ); let allow_credentials = response .headers() diff --git a/tests/api/rate_limit_tests.rs b/tests/api/rate_limit_tests.rs index e7292d8..ccf404e 100644 --- a/tests/api/rate_limit_tests.rs +++ b/tests/api/rate_limit_tests.rs @@ -2,7 +2,7 @@ use axum::http::StatusCode; use serde_json::Value; use crate::common::{ - API_KEY_HEADER, EXPECTED_ROOT_MESSAGE, TEST_MASTER_API_KEY, create_test_app_with_rate_limit, + EXPECTED_ROOT_MESSAGE, HEADER_API_KEY, TEST_MASTER_API_KEY, create_test_app_with_rate_limit, get_with_headers, send_json, }; @@ -39,7 +39,7 @@ async fn enhanced_tier_allows_higher_burst_with_valid_api_key() { for _ in 0..40 { let (status, body) = send_json( &app, - get_with_headers("/", &[(API_KEY_HEADER, TEST_MASTER_API_KEY)]), + get_with_headers("/", &[(HEADER_API_KEY, TEST_MASTER_API_KEY)]), ) .await; diff --git a/tests/api/ytdlp_tests.rs b/tests/api/ytdlp_tests.rs index 980928a..885fcb3 100644 --- a/tests/api/ytdlp_tests.rs +++ b/tests/api/ytdlp_tests.rs @@ -3,7 +3,7 @@ use nadzu::routes::create_router; use serde_json::json; use crate::common::{ - API_KEY_HEADER, CAPTCHA_TOKEN_HEADER, SAMPLE_YTDLP_URL, TEST_MASTER_API_KEY, create_test_app, + CAPTCHA_TOKEN_HEADER, HEADER_API_KEY, SAMPLE_YTDLP_URL, TEST_MASTER_API_KEY, create_test_app, create_test_state, get, get_with_headers, post_json, post_json_with_headers, seed_ytdlp_job, send_json, ytdlp_enqueue_request, }; @@ -67,7 +67,7 @@ async fn ytdlp_list_jobs_returns_array() { &app, get_with_headers( "/api/v1/ytdlp/jobs", - &[(API_KEY_HEADER, TEST_MASTER_API_KEY)], + &[(HEADER_API_KEY, TEST_MASTER_API_KEY)], ), ) .await; @@ -150,7 +150,7 @@ async fn ytdlp_enqueue_rejects_invalid_url_payload() { "quality": "best", "format": "mp4" }), - &[(API_KEY_HEADER, TEST_MASTER_API_KEY)], + &[(HEADER_API_KEY, TEST_MASTER_API_KEY)], ), ) .await; diff --git a/tests/layer_unit_tests.rs b/tests/layer_unit_tests.rs index 9412a5e..eea3c47 100644 --- a/tests/layer_unit_tests.rs +++ b/tests/layer_unit_tests.rs @@ -2,7 +2,7 @@ use axum::http::HeaderMap; use nadzu::{ config::AppConfig, - middleware::{X_API_KEY, api_key::has_valid_master_api_key, rate_limit::is_production}, + middleware::{HEADER_API_KEY, api_key::has_valid_master_api_key, rate_limit::is_production}, models::health::Health, }; @@ -31,7 +31,7 @@ fn test_config(env: &str) -> AppConfig { fn has_valid_master_api_key_returns_true_for_matching_header() { let config = test_config("test"); let mut headers = HeaderMap::new(); - headers.insert(X_API_KEY, "master_key".parse().unwrap()); + headers.insert(HEADER_API_KEY, "master_key".parse().unwrap()); assert!(has_valid_master_api_key(&headers, &config)); } @@ -45,7 +45,7 @@ fn has_valid_master_api_key_returns_false_for_missing_or_wrong_header() { assert!(!has_valid_master_api_key(&empty_headers, &config)); let mut wrong_headers = HeaderMap::new(); - wrong_headers.insert(X_API_KEY, "wrong_key".parse().unwrap()); + wrong_headers.insert(HEADER_API_KEY, "wrong_key".parse().unwrap()); assert!(!has_valid_master_api_key(&wrong_headers, &config)); } From 593b2e4e13ce4d47a51ced16bc8253aff9fbba32 Mon Sep 17 00:00:00 2001 From: Nadu_Dev Date: Wed, 6 May 2026 07:01:25 +0530 Subject: [PATCH 7/7] fix: correct validation message for user name length in ValidateUserRequest --- TODO.md | 124 --------------------------------------- src/models/validation.rs | 2 +- 2 files changed, 1 insertion(+), 125 deletions(-) delete mode 100644 TODO.md diff --git a/TODO.md b/TODO.md deleted file mode 100644 index ad0ce7b..0000000 --- a/TODO.md +++ /dev/null @@ -1,124 +0,0 @@ -# Architectural Improvement Plan -> Based on design review of `nxdun/rust-codebase`. Priority ordered. Each task has a WHY. - ---- - -## TASK 1 — Introduce a Bounded Download Job Queue -**Priority: CRITICAL** -**Files affected:** `src/services/ytdlp/`, `src/controllers/` (ytdlp), `src/state.rs` - -### What to do -- Create a `JobQueue` struct wrapping a `tokio::sync::Semaphore` with a configurable `MAX_CONCURRENT_DOWNLOADS` env cap (e.g. 3). -- Assign each download request a `job_id` (UUID) on submission. -- Store job state (`Pending | Running | Done | Failed`) in Redis with a TTL of ~1 hour. -- The controller returns `202 Accepted` + `{ job_id }` immediately instead of blocking. -- Add a `GET /api/v1/ytdlp/job/:job_id` polling endpoint that reads status from Redis. - -### Why -Right now `tokio::process::Command` is spawned directly per request with no concurrency cap. Two concurrent users = two yt-dlp processes. Ten users = ten processes. This is a fork bomb. The app will OOM or the OS will kill processes randomly. The `202 + poll` pattern is the industry standard for long-running media jobs (YouTube itself uses it). This is the single highest-risk issue in the current design. - ---- - -## TASK 2 — Break AppState into Scoped Injection -**Priority: HIGH** -**Files affected:** `src/state.rs`, `src/app.rs`, all `src/controllers/`, all `src/services/` - -### What to do -- Split `AppState` into two purpose-scoped structs: - - `InfraState` — holds `PgPool`, `RedisPool`, `http_client`, `config` - - `AppState` — holds `ytdlp_manager`, `rate_limiters`, `contributions_service` (business layer) -- Inject `InfraState` directly into services via constructor, not through AppState pass-through. -- Services should not need to reach through AppState to get a DB pool. - -### Why -The current `AppState` in [`src/state.rs`](https://github.com/nxdun/rust-codebase/blob/554569cd0754247fa9020c8745174ef5087e4628/src/state.rs) mixes infrastructure handles (`http_client`, `config`) with business service instances (`ytdlp_manager`, `contributions_service`). This means every unit test for a service must construct the entire AppState including all infra. Splitting them makes services independently testable and removes the implicit coupling between unrelated concerns. - ---- - -## TASK 3 — Add Signed URL File Serving -**Priority: HIGH** -**Files affected:** Caddyfile, new `src/controllers/files.rs`, `src/routes/` - -### What to do -- Remove the direct Caddy volume serve on `/nadun/fs/*` (IP-only gating). -- Add a `POST /api/v1/ytdlp/job/:job_id/download-link` endpoint in Rust that: - - Verifies job is `Done` - - Generates a signed token: `HMAC-SHA256(job_id + expiry_timestamp, SECRET_KEY)` - - Returns `{ url: "/files/:token", expires_at }` -- Add a `GET /files/:token` route that validates the token and streams the file from the volume. - -### Why -The current design serves raw files from block storage gated only by a Caddy IP check. If Cloudflare IP ranges change, or the Caddyfile is misconfigured once, the entire `/downloads` volume becomes publicly browsable. Signed short-lived URLs are the standard (S3 pre-signed URLs work this way). This protects other users' downloaded files from being enumerated. - ---- - -## TASK 4 — Add Explicit Concurrency Annotations to Diagrams -**Priority: MEDIUM** -**Files affected:** `README.md` / diagram source - -### What to do -- On the Core System diagram, add a `JobQueue [Semaphore, max=N]` node between `YtCtrl` and `YtSvc`. -- On the Infrastructure diagram, annotate the Volume serve path with `[signed token required]`. -- Add a note on the Droplet subgraph: `Single-node, no HA — intentional for portfolio scope`. - -### Why -Right now the diagrams imply things that aren't true (unlimited worker concurrency, unguarded file access). A reviewer — recruiter, senior engineer, or tech lead — will spot the missing pieces. Annotating known limitations intentionally signals maturity. "I know this is single-node and here's why" is infinitely better than leaving them to wonder if you missed it. - ---- - -## TASK 5 — Add `src/services/ytdlp/queue.rs` Module -**Priority: MEDIUM (enables Task 1)** -**Files affected:** `src/services/ytdlp/` (new file) - -### What to do -Create `src/services/ytdlp/queue.rs` with: -```rust -pub struct DownloadJob { - pub id: Uuid, - pub url: String, - pub status: JobStatus, - pub created_at: DateTime, -} - -pub enum JobStatus { - Pending, - Running, - Done { output_path: PathBuf }, - Failed { reason: String }, -} - -pub struct JobQueue { - semaphore: Arc, - redis: Arc, -} -``` - -### Why -The `src/services/ytdlp/` directory currently exists but has no queue abstraction. This is the concrete module that implements Task 1. Without a typed `JobStatus` enum, job state gets stored as raw strings in Redis — a source of bugs when reading back state. - ---- - -## TASK 6 — Write Integration Tests for Download Flow -**Priority: LOW (but visible to recruiters)** -**Files affected:** `tests/` - -### What to do -- Add `tests/ytdlp_integration.rs` that mocks `tokio::process::Command` output. -- Test: submit job → get `202` + `job_id` → poll until `Done` → verify file token endpoint returns valid URL. -- Use `wiremock` or `httpmock` for external service mocking. - -### Why -The `tests/` directory exists but is currently sparse. For a portfolio backend, integration tests are the difference between "claims it works" and "proves it works". This is the first thing a technical reviewer will check after reading the README. - ---- - -## Summary Table - -| Task | Impact | Effort | Do First? | -|------|--------|--------|-----------| -| 1 — Job Queue | Prevents OOM / production crash | Medium | YES | -| 2 — Split AppState | Testability, clean architecture | Low | YES | -| 3 — Signed URLs | Security, file privacy | Medium | YES | -| 4 — Diagram annotations | Recruiter impression | Trivial | Quick win | -| 5 — `queue.rs` module | Enables Task 1 | Low | With Task 1 | -| 6 — Integration tests | Portfolio credibility | Medium | After 1-3 | \ No newline at end of file diff --git a/src/models/validation.rs b/src/models/validation.rs index 2d8b0df..4d5c54e 100644 --- a/src/models/validation.rs +++ b/src/models/validation.rs @@ -8,7 +8,7 @@ pub struct ValidateUserRequest { #[validate(length( min = 3, max = 20, - message = "Name must be between 3 and 50 characters" + message = "Name must be between 3 and 20 characters" ))] pub name: String,