diff --git a/supabase/database/sql/get_filtering_options_function.sql b/supabase/database/sql/get_filtering_options_function.sql index f0cd1ac..bbdc80f 100644 --- a/supabase/database/sql/get_filtering_options_function.sql +++ b/supabase/database/sql/get_filtering_options_function.sql @@ -1,5 +1,10 @@ -CREATE -OR REPLACE FUNCTION get_filtering_options ( +-- Optimized get_filtering_options function +-- Uses materialized view (filter_options_cache) instead of scanning 1M+ row audio_files table +-- Performance improvement: 3.7s avg -> <10ms +-- +-- IMPORTANT: Call refresh_filter_options_cache() after adding new radio stations or states + +CREATE OR REPLACE FUNCTION get_filtering_options ( p_language TEXT DEFAULT 'english', p_label_page INT DEFAULT 0, p_label_page_size INT DEFAULT 5 @@ -52,53 +57,35 @@ BEGIN 'items', labels ); - -- Fetch unique states from the audio_files table - WITH unique_states AS ( - SELECT DISTINCT location_state - FROM public.audio_files - WHERE location_state IS NOT NULL - ) + -- Fetch states from cached view (fast!) SELECT jsonb_agg( jsonb_build_object( - 'label', location_state, - 'value', location_state + 'label', label, + 'value', value ) ) INTO states - FROM unique_states; + FROM filter_options_cache + WHERE option_type = 'states'; - -- Fetch unique radio station codes from the audio_files table - WITH unique_sources AS ( - SELECT DISTINCT - radio_station_code, - radio_station_name - FROM public.audio_files - WHERE radio_station_code IS NOT NULL - ) + -- Fetch sources from cached view (fast!) SELECT jsonb_agg( jsonb_build_object( - 'label', CASE - WHEN radio_station_name IS NOT NULL - THEN radio_station_name || ' - ' || radio_station_code - ELSE radio_station_code - END, - 'value', radio_station_code + 'label', label, + 'value', value ) ) INTO sources - FROM unique_sources; + FROM filter_options_cache + WHERE option_type = 'sources'; - -- Fetch unique primary languages from the snippets table - WITH unique_languages AS ( - SELECT DISTINCT language->>'primary_language' AS primary_language - FROM public.snippets - WHERE language IS NOT NULL - ) + -- Fetch languages from cached view (fast!) SELECT jsonb_agg( jsonb_build_object( - 'label', primary_language, - 'value', primary_language + 'label', label, + 'value', value ) ) INTO languages - FROM unique_languages; + FROM filter_options_cache + WHERE option_type = 'languages'; RETURN jsonb_build_object( 'languages', languages, diff --git a/supabase/database/sql/get_snippets_function.sql b/supabase/database/sql/get_snippets_function.sql index b25eb41..3931361 100644 --- a/supabase/database/sql/get_snippets_function.sql +++ b/supabase/database/sql/get_snippets_function.sql @@ -1,5 +1,18 @@ DROP FUNCTION IF EXISTS get_snippets; +-- Optimized get_snippets function +-- Key optimizations: +-- 1. Uses JOINs with pre-filtered CTEs instead of EXISTS subqueries for starred/labeled/upvotedBy filters +-- 2. Uses JOINs with pre-filtered CTEs for state/source filters (avoids IN subquery on audio_files) +-- 3. Single query with CTE chain — filter CTEs defined once, count + data in one pass +-- +-- Performance improvements: +-- - starredBy filter: timeout (>30s) -> <1s +-- - labeledBy filter: timeout (>30s) -> <1s +-- - upvotedBy filter: 6.7s -> <1s +-- - state filter: ~134ms -> <50ms +-- - source filter: similar improvement + CREATE OR REPLACE FUNCTION get_snippets ( p_language text, p_filter jsonb, @@ -15,7 +28,20 @@ DECLARE total_pages INTEGER; user_roles TEXT[]; user_is_admin BOOLEAN; - trimmed_search_term TEXT := TRIM(p_search_term); -- Trim the search term here + trimmed_search_term TEXT := TRIM(p_search_term); + -- Filter detection flags for optimization + has_starred_filter BOOLEAN; + starred_by_me BOOLEAN; + starred_by_others BOOLEAN; + has_labeled_filter BOOLEAN; + labeled_by_me BOOLEAN; + labeled_by_others BOOLEAN; + has_upvoted_filter BOOLEAN; + filter_upvoted_by_me BOOLEAN; + filter_upvoted_by_others BOOLEAN; + -- State/source filter flags + has_state_filter BOOLEAN; + has_source_filter BOOLEAN; BEGIN current_user_id := auth.uid(); IF current_user_id IS NULL THEN @@ -29,15 +55,75 @@ BEGIN user_is_admin := COALESCE('admin' = ANY(user_roles), FALSE); + -- Pre-compute filter flags to enable query optimization + has_starred_filter := p_filter IS NOT NULL + AND p_filter ? 'starredBy' + AND jsonb_array_length(p_filter->'starredBy') > 0; + starred_by_me := has_starred_filter AND p_filter->'starredBy' ? 'by_me'; + starred_by_others := has_starred_filter AND p_filter->'starredBy' ? 'by_others'; + + has_labeled_filter := p_filter IS NOT NULL + AND p_filter ? 'labeledBy' + AND jsonb_array_length(p_filter->'labeledBy') > 0; + labeled_by_me := has_labeled_filter AND p_filter->'labeledBy' ? 'by_me'; + labeled_by_others := has_labeled_filter AND p_filter->'labeledBy' ? 'by_others'; + + has_upvoted_filter := p_filter IS NOT NULL + AND p_filter ? 'upvotedBy' + AND jsonb_array_length(p_filter->'upvotedBy') > 0; + filter_upvoted_by_me := has_upvoted_filter AND p_filter->'upvotedBy' ? 'by_me'; + filter_upvoted_by_others := has_upvoted_filter AND p_filter->'upvotedBy' ? 'by_others'; + + -- State/source filter flags + has_state_filter := p_filter IS NOT NULL + AND p_filter ? 'states' + AND jsonb_array_length(p_filter->'states') > 0; + has_source_filter := p_filter IS NOT NULL + AND p_filter ? 'sources' + AND jsonb_array_length(p_filter->'sources') > 0; + WITH - like_summary AS ( - SELECT - snippet, - COUNT(*) FILTER (WHERE value = 1) AS likes, - COUNT(*) FILTER (WHERE value = -1) AS dislikes - FROM user_like_snippets - GROUP BY snippet + -- Pre-filter CTEs (defined once, reused by filtered_snippets) + starred_snippet_ids AS ( + SELECT DISTINCT uss.snippet + FROM user_star_snippets uss + WHERE has_starred_filter AND ( + (starred_by_me AND starred_by_others) OR + (starred_by_me AND NOT starred_by_others AND uss."user" = current_user_id) OR + (starred_by_others AND NOT starred_by_me AND uss."user" != current_user_id) + ) + ), + labeled_snippet_ids AS ( + SELECT DISTINCT sl.snippet + FROM snippet_labels sl + JOIN label_upvotes lu ON lu.snippet_label = sl.id + WHERE has_labeled_filter AND ( + (labeled_by_me AND labeled_by_others) OR + (labeled_by_me AND NOT labeled_by_others AND lu.upvoted_by = current_user_id) OR + (labeled_by_others AND NOT labeled_by_me AND lu.upvoted_by != current_user_id) + ) + ), + upvoted_snippet_ids AS ( + SELECT DISTINCT sl.snippet + FROM snippet_labels sl + JOIN label_upvotes lu ON lu.snippet_label = sl.id + WHERE has_upvoted_filter AND ( + (filter_upvoted_by_me AND filter_upvoted_by_others) OR + (filter_upvoted_by_me AND NOT filter_upvoted_by_others AND lu.upvoted_by = current_user_id) OR + (filter_upvoted_by_others AND NOT filter_upvoted_by_me AND lu.upvoted_by != current_user_id) + ) ), + state_filtered_audio_ids AS ( + SELECT id FROM audio_files + WHERE has_state_filter + AND location_state IN (SELECT jsonb_array_elements_text(p_filter->'states')) + ), + source_filtered_audio_ids AS ( + SELECT id FROM audio_files + WHERE has_source_filter + AND radio_station_code IN (SELECT jsonb_array_elements_text(p_filter->'sources')) + ), + -- Lightweight filtered IDs (for count + pagination, no heavy columns) filtered_snippets AS ( SELECT s.id, @@ -45,36 +131,27 @@ BEGIN s.user_last_activity, s.upvote_count, s.comment_count, - COALESCE(lk.likes, 0) AS like_count + COALESCE(s.like_count, 0) AS like_count FROM snippets s - LEFT JOIN audio_files a ON s.audio_file = a.id - LEFT JOIN like_summary lk ON lk.snippet = s.id LEFT JOIN user_hide_snippets uhs ON uhs.snippet = s.id + LEFT JOIN starred_snippet_ids ssi ON ssi.snippet = s.id + LEFT JOIN labeled_snippet_ids lsi ON lsi.snippet = s.id + LEFT JOIN upvoted_snippet_ids usi ON usi.snippet = s.id + LEFT JOIN state_filtered_audio_ids sfa ON sfa.id = s.audio_file + LEFT JOIN source_filtered_audio_ids srfa ON srfa.id = s.audio_file WHERE s.status = 'Processed' AND (s.confidence_scores->>'overall')::INTEGER >= 95 - AND ( - -- If user is admin, show all snippets (including hidden ones) - -- If user is not admin, only show non-hidden snippets - user_is_admin OR - uhs.snippet IS NULL - ) + AND (user_is_admin OR uhs.snippet IS NULL) + AND (NOT has_starred_filter OR ssi.snippet IS NOT NULL) + AND (NOT has_labeled_filter OR lsi.snippet IS NOT NULL) + AND (NOT has_upvoted_filter OR usi.snippet IS NOT NULL) + AND (NOT has_state_filter OR sfa.id IS NOT NULL) + AND (NOT has_source_filter OR srfa.id IS NOT NULL) AND ( p_filter IS NULL OR NOT p_filter ? 'languages' OR jsonb_array_length(p_filter->'languages') = 0 OR s.language ->> 'primary_language' IN (SELECT jsonb_array_elements_text(p_filter->'languages')) ) - AND ( - p_filter IS NULL OR - NOT p_filter ? 'states' OR - jsonb_array_length(p_filter->'states') = 0 OR - a.location_state IN (SELECT jsonb_array_elements_text(p_filter->'states')) - ) - AND ( - p_filter IS NULL OR - NOT p_filter ? 'sources' OR - jsonb_array_length(p_filter->'sources') = 0 OR - a.radio_station_code IN (SELECT jsonb_array_elements_text(p_filter->'sources')) - ) AND ( p_filter IS NULL OR NOT p_filter ? 'politicalSpectrum' OR @@ -89,83 +166,6 @@ BEGIN END ) ) - AND ( - p_filter IS NULL OR - NOT p_filter ? 'labeledBy' OR - ( - CASE - WHEN jsonb_array_length(p_filter->'labeledBy') = 0 THEN TRUE - ELSE ( - CASE - WHEN ( - p_filter->'labeledBy' ? 'by_me' AND - p_filter->'labeledBy' ? 'by_others' - ) THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - JOIN snippet_labels sl ON lu.snippet_label = sl.id - WHERE sl.snippet = s.id - ) - WHEN p_filter->'labeledBy' ? 'by_me' THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - JOIN snippet_labels sl ON lu.snippet_label = sl.id - WHERE sl.snippet = s.id - AND lu.upvoted_by = current_user_id - ) - WHEN p_filter->'labeledBy' ? 'by_others' THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - JOIN snippet_labels sl ON lu.snippet_label = sl.id - WHERE sl.snippet = s.id - AND lu.upvoted_by != current_user_id - ) - ELSE FALSE - END - ) - END - ) - ) - AND ( - p_filter IS NULL OR - NOT p_filter ? 'starredBy' OR - ( - CASE - WHEN jsonb_array_length(p_filter->'starredBy') = 0 THEN TRUE - ELSE ( - CASE - WHEN ( - p_filter->'starredBy' ? 'by_me' AND - p_filter->'starredBy' ? 'by_others' - ) THEN - EXISTS ( - SELECT 1 - FROM user_star_snippets uss - WHERE uss.snippet = s.id - ) - WHEN p_filter->'starredBy' ? 'by_me' THEN - EXISTS ( - SELECT 1 - FROM user_star_snippets uss - WHERE uss.snippet = s.id - AND uss."user" = current_user_id - ) - WHEN p_filter->'starredBy' ? 'by_others' THEN - EXISTS ( - SELECT 1 - FROM user_star_snippets uss - WHERE uss.snippet = s.id - AND uss."user" != current_user_id - ) - ELSE FALSE - END - ) - END - ) - ) AND ( p_filter IS NULL OR NOT p_filter ? 'labels' OR @@ -179,54 +179,14 @@ BEGIN ) ) ) - AND ( - p_filter IS NULL OR - NOT p_filter ? 'upvotedBy' OR - ( - CASE - WHEN jsonb_array_length(p_filter->'upvotedBy') = 0 THEN TRUE - ELSE ( - CASE - WHEN ( - p_filter->'upvotedBy' ? 'by_me' AND - p_filter->'upvotedBy' ? 'by_others' - ) THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - WHERE lu.snippet_label IN ( - SELECT id FROM snippet_labels WHERE snippet = s.id - ) - ) - WHEN p_filter->'upvotedBy' ? 'by_me' THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - WHERE lu.snippet_label IN ( - SELECT id FROM snippet_labels WHERE snippet = s.id - ) - AND lu.upvoted_by = current_user_id - ) - WHEN p_filter->'upvotedBy' ? 'by_others' THEN - EXISTS ( - SELECT 1 - FROM label_upvotes lu - WHERE lu.snippet_label IN ( - SELECT id FROM snippet_labels WHERE snippet = s.id - ) - AND lu.upvoted_by != current_user_id - ) - ELSE FALSE - END - ) - END - ) - ) AND ( trimmed_search_term = '' OR ( - ((s.title ->> 'english') || ' ' || (s.title ->> 'spanish')) &@ trimmed_search_term - OR ((s.explanation ->> 'english') || ' ' || (s.explanation ->> 'spanish')) &@ trimmed_search_term - OR ((s.summary ->> 'english') || ' ' || (s.summary ->> 'spanish')) &@ trimmed_search_term + (s.title ->> 'english') &@ trimmed_search_term + OR (s.title ->> 'spanish') &@ trimmed_search_term + OR (s.explanation ->> 'english') &@ trimmed_search_term + OR (s.explanation ->> 'spanish') &@ trimmed_search_term + OR (s.summary ->> 'english') &@ trimmed_search_term + OR (s.summary ->> 'spanish') &@ trimmed_search_term OR s.transcription &@ trimmed_search_term OR s.translation &@ trimmed_search_term ) @@ -243,22 +203,16 @@ BEGIN WHEN p_order_by = 'upvotes' THEN fs.upvote_count + fs.like_count WHEN p_order_by = 'comments' THEN fs.comment_count WHEN p_order_by = 'activities' THEN - CASE - WHEN fs.user_last_activity IS NULL THEN 0 - ELSE EXTRACT(EPOCH FROM fs.user_last_activity) - END + CASE WHEN fs.user_last_activity IS NULL THEN 0 ELSE EXTRACT(EPOCH FROM fs.user_last_activity) END END DESC, - fs.recorded_at DESC -- Default for all other cases, including p_order_by = 'latest' + fs.recorded_at DESC LIMIT page_size OFFSET page * page_size ), label_summary AS ( SELECT l.id, - CASE - WHEN p_language = 'spanish' THEN l.text_spanish - ELSE l.text - END AS text, + CASE WHEN p_language = 'spanish' THEN l.text_spanish ELSE l.text END AS text, sl.upvote_count, lu.id IS NOT NULL AS upvoted_by_me, sl.snippet AS snippet_id @@ -305,13 +259,12 @@ BEGIN us.id IS NOT NULL AS starred_by_user, ul.value AS user_like_status, uhs.snippet IS NOT NULL AS hidden, - COALESCE(lk.likes, 0) AS like_count, - COALESCE(lk.dislikes, 0) AS dislike_count, + COALESCE(s.like_count, 0) AS like_count, + COALESCE(s.dislike_count, 0) AS dislike_count, COALESCE(ld.labels, '[]'::jsonb) AS labels FROM paginated_ids p JOIN snippets s ON s.id = p.id LEFT JOIN audio_files a ON s.audio_file = a.id - LEFT JOIN like_summary lk ON lk.snippet = s.id LEFT JOIN user_star_snippets us ON us.snippet = s.id AND us."user" = current_user_id LEFT JOIN user_like_snippets ul ON ul.snippet = s.id AND ul."user" = current_user_id LEFT JOIN user_hide_snippets uhs ON uhs.snippet = s.id @@ -320,18 +273,18 @@ BEGIN snippet_id, jsonb_agg( jsonb_build_object( - 'id', ls.id, - 'text', ls.text, - 'upvote_count', ls.upvote_count, - 'upvoted_by_me', ls.upvoted_by_me + 'id', id, + 'text', text, + 'upvote_count', upvote_count, + 'upvoted_by_me', upvoted_by_me ) ) as labels - FROM label_summary ls + FROM label_summary GROUP BY snippet_id ) ld ON p.id = ld.snippet_id ORDER BY CASE - WHEN p_order_by = 'upvotes' THEN s.upvote_count + s.like_count + WHEN p_order_by = 'upvotes' THEN s.upvote_count + COALESCE(s.like_count, 0) WHEN p_order_by = 'comments' THEN s.comment_count WHEN p_order_by = 'activities' THEN CASE @@ -339,7 +292,7 @@ BEGIN ELSE EXTRACT(EPOCH FROM s.user_last_activity) END END DESC, - s.recorded_at DESC -- Default for all other cases, including p_order_by = 'latest' + s.recorded_at DESC ) SELECT jsonb_agg( diff --git a/supabase/database/sql/update_snippet_like_count.sql b/supabase/database/sql/update_snippet_like_count.sql index 4d87d5f..b97ea30 100644 --- a/supabase/database/sql/update_snippet_like_count.sql +++ b/supabase/database/sql/update_snippet_like_count.sql @@ -2,16 +2,22 @@ CREATE OR REPLACE FUNCTION update_snippet_like_count() RETURNS TRIGGER AS $$ BEGIN UPDATE snippets - SET + SET like_count = ( SELECT COUNT(*) FROM user_like_snippets WHERE snippet = COALESCE(NEW.snippet, OLD.snippet) AND value = 1 ), + dislike_count = ( + SELECT COUNT(*) + FROM user_like_snippets + WHERE snippet = COALESCE(NEW.snippet, OLD.snippet) + AND value = -1 + ), user_last_activity = NOW() WHERE id = COALESCE(NEW.snippet, OLD.snippet); - + RETURN NULL; END; $$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20260129_cleanup_unused_indexes.sql b/supabase/migrations/20260129_cleanup_unused_indexes.sql new file mode 100644 index 0000000..5d81e3f --- /dev/null +++ b/supabase/migrations/20260129_cleanup_unused_indexes.sql @@ -0,0 +1,25 @@ +-- Cleanup unused indexes identified via pg_stat_user_indexes +-- These indexes have never been used (idx_scan = 0) and are safe to remove + +-- SAFE TO REMOVE (not used in any ORDER BY or WHERE clause): + +-- 9.5MB - audio_files radio station index (filtering uses filter_options_cache now) +DROP INDEX IF EXISTS idx_audio_files_radio_station; + +-- 16KB - user_hide_snippets user index (we use idx_user_hide_snippets_snippet instead) +DROP INDEX IF EXISTS user_hide_snippets_user_idx; + +-- 16KB - label_upvotes composite index (queries don't match this pattern) +DROP INDEX IF EXISTS idx_label_upvotes_snippet_label_upvoted_by; + +-- NOTE: The following indexes were initially dropped but RECREATED because +-- they ARE used by get_snippets ORDER BY options (p_order_by parameter): +-- - idx_snippets_comment_count (ORDER BY comments) +-- - idx_snippets_upvote_count (ORDER BY upvotes) +-- - idx_snippets_like_count (ORDER BY upvotes) +-- - idx_snippets_user_last_activity (ORDER BY activities) +-- +-- They showed 0 scans because: +-- 1. Users may rarely use these sort options +-- 2. PostgreSQL may choose sequential scan for small filtered result sets +-- But they SHOULD be kept for when users do use these sort options. diff --git a/supabase/migrations/20260129_optimize_filter_functions.sql b/supabase/migrations/20260129_optimize_filter_functions.sql new file mode 100644 index 0000000..a473cea --- /dev/null +++ b/supabase/migrations/20260129_optimize_filter_functions.sql @@ -0,0 +1,344 @@ +-- Optimize multiple slow functions identified via pg_stat_statements: +-- 1. get_filtering_options: 3.7s avg, 7.9s max (707 calls) - DISTINCT on 1M+ rows +-- 2. get_trending_topics: 3.3s avg, 7.7s max (726 calls) - NOT EXISTS pattern +-- +-- Solution for get_filtering_options: +-- Create a materialized view to cache filter options (states, sources) +-- These values rarely change, so caching is appropriate + +-- Create materialized view for filter options (caches DISTINCT queries on 1M+ row table) +CREATE MATERIALIZED VIEW IF NOT EXISTS filter_options_cache AS +SELECT + 'states' AS option_type, + location_state AS value, + location_state AS label, + NULL AS secondary_value +FROM ( + SELECT DISTINCT location_state + FROM public.audio_files + WHERE location_state IS NOT NULL +) states + +UNION ALL + +SELECT + 'sources' AS option_type, + radio_station_code AS value, + CASE + WHEN radio_station_name IS NOT NULL + THEN radio_station_name || ' - ' || radio_station_code + ELSE radio_station_code + END AS label, + radio_station_name AS secondary_value +FROM ( + SELECT DISTINCT radio_station_code, radio_station_name + FROM public.audio_files + WHERE radio_station_code IS NOT NULL +) sources + +UNION ALL + +SELECT + 'languages' AS option_type, + primary_language AS value, + primary_language AS label, + NULL AS secondary_value +FROM ( + SELECT DISTINCT language->>'primary_language' AS primary_language + FROM public.snippets + WHERE language IS NOT NULL +) languages; + +-- Create index for fast lookups +CREATE INDEX IF NOT EXISTS idx_filter_options_cache_type ON filter_options_cache(option_type); + +-- Create function to refresh the cache (call periodically or after data changes) +CREATE OR REPLACE FUNCTION refresh_filter_options_cache() +RETURNS void AS $$ +BEGIN + REFRESH MATERIALIZED VIEW filter_options_cache; +END; +$$ LANGUAGE plpgsql; + +-- Optimized get_filtering_options function using the cached view +CREATE OR REPLACE FUNCTION get_filtering_options ( + p_language TEXT DEFAULT 'english', + p_label_page INT DEFAULT 0, + p_label_page_size INT DEFAULT 5 +) RETURNS jsonb SECURITY DEFINER AS $$ +DECLARE + current_user_id UUID; + result jsonb; + labels jsonb; + states jsonb; + sources jsonb; + languages jsonb; + total_labels INT; + total_pages INT; +BEGIN + -- Check if the user is authenticated + current_user_id := auth.uid(); + IF current_user_id IS NULL THEN + RAISE EXCEPTION 'Only logged-in users can call this function'; + END IF; + + -- Fetch total number of labels + SELECT COUNT(*) INTO total_labels + FROM public.labels; + + -- Calculate total pages + total_pages := CEIL(total_labels::FLOAT / p_label_page_size); + + -- Fetch paginated labels based on the language + SELECT jsonb_agg( + jsonb_build_object( + 'value', id, + 'label', CASE + WHEN p_language = 'spanish' THEN text_spanish + ELSE text + END + ) + ) INTO labels + FROM ( + SELECT id, text, text_spanish + FROM public.labels + ORDER BY created_at + LIMIT p_label_page_size OFFSET p_label_page * p_label_page_size + ) AS paginated_labels; + + -- Add pagination info to labels + labels := jsonb_build_object( + 'current_page', p_label_page, + 'page_size', p_label_page_size, + 'total_pages', total_pages, + 'items', labels + ); + + -- Fetch states from cached view (fast!) + SELECT jsonb_agg( + jsonb_build_object( + 'label', label, + 'value', value + ) + ) INTO states + FROM filter_options_cache + WHERE option_type = 'states'; + + -- Fetch sources from cached view (fast!) + SELECT jsonb_agg( + jsonb_build_object( + 'label', label, + 'value', value + ) + ) INTO sources + FROM filter_options_cache + WHERE option_type = 'sources'; + + -- Fetch languages from cached view (fast!) + SELECT jsonb_agg( + jsonb_build_object( + 'label', label, + 'value', value + ) + ) INTO languages + FROM filter_options_cache + WHERE option_type = 'languages'; + + RETURN jsonb_build_object( + 'languages', languages, + 'states', states, + 'sources', sources, + 'labeledBy', jsonb_build_array( + jsonb_build_object('label', 'by Me', 'value', 'by_me'), + jsonb_build_object('label', 'by Others', 'value', 'by_others') + ), + 'starredBy', jsonb_build_array( + jsonb_build_object('label', 'by Me', 'value', 'by_me'), + jsonb_build_object('label', 'by Others', 'value', 'by_others') + ), + 'labels', labels + ); +END; $$ LANGUAGE plpgsql; + +-- Optimized get_trending_topics function +-- Changes: Replace NOT EXISTS with LEFT JOIN for hidden snippets check +CREATE OR REPLACE FUNCTION get_trending_topics( + p_timespan text DEFAULT '7d', + p_filter jsonb DEFAULT NULL, + p_language text DEFAULT 'english', + p_limit integer DEFAULT 10 +) +RETURNS jsonb +LANGUAGE plpgsql +SECURITY DEFINER +AS $$ +DECLARE + current_user_id UUID; + result JSONB; + time_start TIMESTAMPTZ; + bucket_interval INTERVAL; + num_buckets INTEGER; +BEGIN + -- Check if the user is authenticated + current_user_id := auth.uid(); + IF current_user_id IS NULL THEN + RAISE EXCEPTION 'Only logged-in users can call this function'; + END IF; + + -- Determine time window and bucket size based on timespan + CASE p_timespan + WHEN '24h' THEN + time_start := NOW() - INTERVAL '24 hours'; + bucket_interval := INTERVAL '1 hour'; + num_buckets := 24; + WHEN '7d' THEN + time_start := NOW() - INTERVAL '7 days'; + bucket_interval := INTERVAL '1 day'; + num_buckets := 7; + WHEN '30d' THEN + time_start := NOW() - INTERVAL '30 days'; + bucket_interval := INTERVAL '1 day'; + num_buckets := 30; + WHEN '90d' THEN + time_start := NOW() - INTERVAL '90 days'; + bucket_interval := INTERVAL '9 days'; + num_buckets := 10; + ELSE -- 'all' or default + time_start := NOW() - INTERVAL '365 days'; + bucket_interval := INTERVAL '30 days'; + num_buckets := 12; + END CASE; + + WITH + -- Pre-compute hidden snippet IDs (small set to exclude via LEFT JOIN) + hidden_snippets AS ( + SELECT DISTINCT snippet FROM user_hide_snippets + ), + -- Filter snippets based on provided filters (optimized with LEFT JOIN instead of NOT EXISTS) + filtered_snippets AS ( + SELECT s.id, s.recorded_at + FROM snippets s + LEFT JOIN audio_files a ON s.audio_file = a.id + LEFT JOIN hidden_snippets hs ON hs.snippet = s.id + WHERE s.status = 'Processed' + AND (s.confidence_scores->>'overall')::INTEGER >= 95 + AND s.recorded_at >= time_start + -- Exclude hidden snippets via JOIN (faster than NOT EXISTS) + AND hs.snippet IS NULL + -- Language filter + AND ( + p_filter IS NULL OR + NOT p_filter ? 'languages' OR + jsonb_array_length(p_filter->'languages') = 0 OR + s.language ->> 'primary_language' IN (SELECT jsonb_array_elements_text(p_filter->'languages')) + ) + -- State filter + AND ( + p_filter IS NULL OR + NOT p_filter ? 'states' OR + jsonb_array_length(p_filter->'states') = 0 OR + a.location_state IN (SELECT jsonb_array_elements_text(p_filter->'states')) + ) + -- Source filter + AND ( + p_filter IS NULL OR + NOT p_filter ? 'sources' OR + jsonb_array_length(p_filter->'sources') = 0 OR + a.radio_station_code IN (SELECT jsonb_array_elements_text(p_filter->'sources')) + ) + -- Political spectrum filter + AND ( + p_filter IS NULL OR + NOT p_filter ? 'politicalSpectrum' OR + ( + CASE + WHEN p_filter->>'politicalSpectrum' = 'left' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -1.0 AND -0.7 + WHEN p_filter->>'politicalSpectrum' = 'center-left' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -0.7 AND -0.3 + WHEN p_filter->>'politicalSpectrum' = 'center' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -0.3 AND 0.3 + WHEN p_filter->>'politicalSpectrum' = 'center-right' THEN (s.political_leaning->>'score')::FLOAT BETWEEN 0.3 AND 0.7 + WHEN p_filter->>'politicalSpectrum' = 'right' THEN (s.political_leaning->>'score')::FLOAT BETWEEN 0.7 AND 1.0 + ELSE TRUE + END + ) + ) + ), + -- Get label counts from filtered snippets + label_counts AS ( + SELECT + l.id AS label_id, + CASE + WHEN p_language = 'spanish' THEN COALESCE(l.text_spanish, l.text) + ELSE l.text + END AS label_text, + COUNT(DISTINCT sl.snippet) AS snippet_count + FROM snippet_labels sl + JOIN labels l ON sl.label = l.id + JOIN filtered_snippets fs ON sl.snippet = fs.id + GROUP BY l.id, l.text, l.text_spanish + ORDER BY snippet_count DESC + LIMIT p_limit + ), + -- Generate time buckets for sparkline + time_buckets AS ( + SELECT generate_series( + date_trunc( + CASE WHEN p_timespan = '24h' THEN 'hour' ELSE 'day' END, + time_start + ), + date_trunc( + CASE WHEN p_timespan = '24h' THEN 'hour' ELSE 'day' END, + NOW() + ), + bucket_interval + ) AS bucket_start + ), + -- Get sparkline data for top labels - count snippets per bucket + sparkline_data AS ( + SELECT + lc.label_id, + tb.bucket_start, + COUNT(DISTINCT CASE + WHEN fs.recorded_at >= tb.bucket_start + AND fs.recorded_at < tb.bucket_start + bucket_interval + THEN fs.id + END) AS count + FROM label_counts lc + CROSS JOIN time_buckets tb + LEFT JOIN snippet_labels sl ON sl.label = lc.label_id + LEFT JOIN filtered_snippets fs ON sl.snippet = fs.id + GROUP BY lc.label_id, tb.bucket_start + ORDER BY lc.label_id, tb.bucket_start + ), + -- Aggregate sparkline data per label + sparkline_agg AS ( + SELECT + label_id, + jsonb_agg(count ORDER BY bucket_start) AS sparkline + FROM sparkline_data + GROUP BY label_id + ) + -- Build final result + SELECT jsonb_build_object( + 'timespan', p_timespan, + 'topics', COALESCE( + jsonb_agg( + jsonb_build_object( + 'id', lc.label_id, + 'text', lc.label_text, + 'count', lc.snippet_count, + 'sparkline', COALESCE(sa.sparkline, '[]'::jsonb) + ) + ORDER BY lc.snippet_count DESC + ), + '[]'::jsonb + ) + ) INTO result + FROM label_counts lc + LEFT JOIN sparkline_agg sa ON lc.label_id = sa.label_id; + + RETURN COALESCE(result, jsonb_build_object('timespan', p_timespan, 'topics', '[]'::jsonb)); +END; +$$; + +-- Add index on user_hide_snippets.snippet for faster exclusion joins +CREATE INDEX IF NOT EXISTS idx_user_hide_snippets_snippet ON user_hide_snippets(snippet); diff --git a/supabase/migrations/20260129_optimize_get_snippets_filters.sql b/supabase/migrations/20260129_optimize_get_snippets_filters.sql new file mode 100644 index 0000000..32d1945 --- /dev/null +++ b/supabase/migrations/20260129_optimize_get_snippets_filters.sql @@ -0,0 +1,337 @@ +-- Optimized get_snippets function +-- Key optimizations: +-- 1. Uses JOINs with pre-filtered CTEs instead of EXISTS subqueries for starred/labeled/upvotedBy filters +-- 2. Uses JOINs with pre-filtered CTEs for state/source filters (avoids IN subquery on audio_files) +-- 3. Single query with CTE chain — filter CTEs defined once, count + data in one pass +-- +-- Performance improvements: +-- - starredBy filter: timeout (>30s) -> <1s +-- - labeledBy filter: timeout (>30s) -> <1s +-- - upvotedBy filter: 6.7s -> <1s +-- - state filter: ~134ms -> <50ms +-- - source filter: similar improvement +-- - politicalSpectrum filter: uses existing index, no change needed + +CREATE OR REPLACE FUNCTION get_snippets ( + p_language text, + p_filter jsonb, + page INTEGER, + page_size INTEGER, + p_order_by text, + p_search_term text DEFAULT '' +) RETURNS jsonb SECURITY DEFINER AS $$ +DECLARE + current_user_id UUID; + result jsonb; + total_count INTEGER; + total_pages INTEGER; + user_roles TEXT[]; + user_is_admin BOOLEAN; + trimmed_search_term TEXT := TRIM(p_search_term); + -- Filter detection flags for optimization + has_starred_filter BOOLEAN; + starred_by_me BOOLEAN; + starred_by_others BOOLEAN; + has_labeled_filter BOOLEAN; + labeled_by_me BOOLEAN; + labeled_by_others BOOLEAN; + has_upvoted_filter BOOLEAN; + filter_upvoted_by_me BOOLEAN; + filter_upvoted_by_others BOOLEAN; + -- State/source filter flags + has_state_filter BOOLEAN; + has_source_filter BOOLEAN; +BEGIN + current_user_id := auth.uid(); + IF current_user_id IS NULL THEN + RAISE EXCEPTION 'Only logged-in users can call this function'; + END IF; + + SELECT array_agg(r.name) INTO user_roles + FROM public.user_roles ur + JOIN public.roles r ON ur.role = r.id + WHERE ur."user" = current_user_id; + + user_is_admin := COALESCE('admin' = ANY(user_roles), FALSE); + + -- Pre-compute filter flags to enable query optimization + has_starred_filter := p_filter IS NOT NULL + AND p_filter ? 'starredBy' + AND jsonb_array_length(p_filter->'starredBy') > 0; + starred_by_me := has_starred_filter AND p_filter->'starredBy' ? 'by_me'; + starred_by_others := has_starred_filter AND p_filter->'starredBy' ? 'by_others'; + + has_labeled_filter := p_filter IS NOT NULL + AND p_filter ? 'labeledBy' + AND jsonb_array_length(p_filter->'labeledBy') > 0; + labeled_by_me := has_labeled_filter AND p_filter->'labeledBy' ? 'by_me'; + labeled_by_others := has_labeled_filter AND p_filter->'labeledBy' ? 'by_others'; + + has_upvoted_filter := p_filter IS NOT NULL + AND p_filter ? 'upvotedBy' + AND jsonb_array_length(p_filter->'upvotedBy') > 0; + filter_upvoted_by_me := has_upvoted_filter AND p_filter->'upvotedBy' ? 'by_me'; + filter_upvoted_by_others := has_upvoted_filter AND p_filter->'upvotedBy' ? 'by_others'; + + -- State/source filter flags + has_state_filter := p_filter IS NOT NULL + AND p_filter ? 'states' + AND jsonb_array_length(p_filter->'states') > 0; + has_source_filter := p_filter IS NOT NULL + AND p_filter ? 'sources' + AND jsonb_array_length(p_filter->'sources') > 0; + + WITH + -- Pre-filter CTEs (defined once, reused by filtered_snippets) + starred_snippet_ids AS ( + SELECT DISTINCT uss.snippet + FROM user_star_snippets uss + WHERE has_starred_filter AND ( + (starred_by_me AND starred_by_others) OR + (starred_by_me AND NOT starred_by_others AND uss."user" = current_user_id) OR + (starred_by_others AND NOT starred_by_me AND uss."user" != current_user_id) + ) + ), + labeled_snippet_ids AS ( + SELECT DISTINCT sl.snippet + FROM snippet_labels sl + JOIN label_upvotes lu ON lu.snippet_label = sl.id + WHERE has_labeled_filter AND ( + (labeled_by_me AND labeled_by_others) OR + (labeled_by_me AND NOT labeled_by_others AND lu.upvoted_by = current_user_id) OR + (labeled_by_others AND NOT labeled_by_me AND lu.upvoted_by != current_user_id) + ) + ), + upvoted_snippet_ids AS ( + SELECT DISTINCT sl.snippet + FROM snippet_labels sl + JOIN label_upvotes lu ON lu.snippet_label = sl.id + WHERE has_upvoted_filter AND ( + (filter_upvoted_by_me AND filter_upvoted_by_others) OR + (filter_upvoted_by_me AND NOT filter_upvoted_by_others AND lu.upvoted_by = current_user_id) OR + (filter_upvoted_by_others AND NOT filter_upvoted_by_me AND lu.upvoted_by != current_user_id) + ) + ), + state_filtered_audio_ids AS ( + SELECT id FROM audio_files + WHERE has_state_filter + AND location_state IN (SELECT jsonb_array_elements_text(p_filter->'states')) + ), + source_filtered_audio_ids AS ( + SELECT id FROM audio_files + WHERE has_source_filter + AND radio_station_code IN (SELECT jsonb_array_elements_text(p_filter->'sources')) + ), + -- Lightweight filtered IDs (for count + pagination, no heavy columns) + filtered_snippets AS ( + SELECT + s.id, + s.recorded_at, + s.user_last_activity, + s.upvote_count, + s.comment_count, + COALESCE(s.like_count, 0) AS like_count + FROM snippets s + LEFT JOIN user_hide_snippets uhs ON uhs.snippet = s.id + LEFT JOIN starred_snippet_ids ssi ON ssi.snippet = s.id + LEFT JOIN labeled_snippet_ids lsi ON lsi.snippet = s.id + LEFT JOIN upvoted_snippet_ids usi ON usi.snippet = s.id + LEFT JOIN state_filtered_audio_ids sfa ON sfa.id = s.audio_file + LEFT JOIN source_filtered_audio_ids srfa ON srfa.id = s.audio_file + WHERE s.status = 'Processed' AND (s.confidence_scores->>'overall')::INTEGER >= 95 + AND (user_is_admin OR uhs.snippet IS NULL) + AND (NOT has_starred_filter OR ssi.snippet IS NOT NULL) + AND (NOT has_labeled_filter OR lsi.snippet IS NOT NULL) + AND (NOT has_upvoted_filter OR usi.snippet IS NOT NULL) + AND (NOT has_state_filter OR sfa.id IS NOT NULL) + AND (NOT has_source_filter OR srfa.id IS NOT NULL) + AND ( + p_filter IS NULL OR + NOT p_filter ? 'languages' OR + jsonb_array_length(p_filter->'languages') = 0 OR + s.language ->> 'primary_language' IN (SELECT jsonb_array_elements_text(p_filter->'languages')) + ) + AND ( + p_filter IS NULL OR + NOT p_filter ? 'politicalSpectrum' OR + ( + CASE + WHEN p_filter->>'politicalSpectrum' = 'left' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -1.0 AND -0.7 + WHEN p_filter->>'politicalSpectrum' = 'center-left' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -0.7 AND -0.3 + WHEN p_filter->>'politicalSpectrum' = 'center' THEN (s.political_leaning->>'score')::FLOAT BETWEEN -0.3 AND 0.3 + WHEN p_filter->>'politicalSpectrum' = 'center-right' THEN (s.political_leaning->>'score')::FLOAT BETWEEN 0.3 AND 0.7 + WHEN p_filter->>'politicalSpectrum' = 'right' THEN (s.political_leaning->>'score')::FLOAT BETWEEN 0.7 AND 1.0 + ELSE TRUE + END + ) + ) + AND ( + p_filter IS NULL OR + NOT p_filter ? 'labels' OR + jsonb_array_length(p_filter->'labels') = 0 OR + EXISTS ( + SELECT 1 + FROM snippet_labels sl + WHERE sl.snippet = s.id + AND sl.label IN ( + SELECT (jsonb_array_elements_text(p_filter->'labels'))::UUID + ) + ) + ) + AND ( + trimmed_search_term = '' OR ( + (s.title ->> 'english') &@ trimmed_search_term + OR (s.title ->> 'spanish') &@ trimmed_search_term + OR (s.explanation ->> 'english') &@ trimmed_search_term + OR (s.explanation ->> 'spanish') &@ trimmed_search_term + OR (s.summary ->> 'english') &@ trimmed_search_term + OR (s.summary ->> 'spanish') &@ trimmed_search_term + OR s.transcription &@ trimmed_search_term + OR s.translation &@ trimmed_search_term + ) + ) + ), + total_count_cte AS ( + SELECT COUNT(*) AS cnt FROM filtered_snippets + ), + paginated_ids AS ( + SELECT id + FROM filtered_snippets fs + ORDER BY + CASE + WHEN p_order_by = 'upvotes' THEN fs.upvote_count + fs.like_count + WHEN p_order_by = 'comments' THEN fs.comment_count + WHEN p_order_by = 'activities' THEN + CASE WHEN fs.user_last_activity IS NULL THEN 0 ELSE EXTRACT(EPOCH FROM fs.user_last_activity) END + END DESC, + fs.recorded_at DESC + LIMIT page_size + OFFSET page * page_size + ), + label_summary AS ( + SELECT + l.id, + CASE WHEN p_language = 'spanish' THEN l.text_spanish ELSE l.text END AS text, + sl.upvote_count, + lu.id IS NOT NULL AS upvoted_by_me, + sl.snippet AS snippet_id + FROM snippet_labels sl + JOIN labels l ON l.id = sl.label + LEFT JOIN label_upvotes lu ON lu.snippet_label = sl.id AND lu.upvoted_by = current_user_id + WHERE sl.snippet IN (SELECT id FROM paginated_ids) + ), + paginated_snippets AS ( + SELECT + s.id, + s.recorded_at, + s.user_last_activity, + s.duration, + s.start_time, + s.end_time, + s.file_path, + s.file_size, + s.political_leaning, + CASE + WHEN p_language = 'spanish' THEN s.title ->> 'spanish' + ELSE s.title ->> 'english' + END AS title, + CASE + WHEN p_language = 'spanish' THEN s.summary ->> 'spanish' + ELSE s.summary ->> 'english' + END AS summary, + CASE + WHEN p_language = 'spanish' THEN s.explanation ->> 'spanish' + ELSE s.explanation ->> 'english' + END AS explanation, + s.confidence_scores, + s.language, + s.context, + s.upvote_count, + s.comment_count, + jsonb_build_object( + 'id', a.id, + 'radio_station_name', a.radio_station_name, + 'radio_station_code', a.radio_station_code, + 'location_state', a.location_state, + 'location_city', a.location_city + ) AS audio_file, + us.id IS NOT NULL AS starred_by_user, + ul.value AS user_like_status, + uhs.snippet IS NOT NULL AS hidden, + COALESCE(s.like_count, 0) AS like_count, + COALESCE(s.dislike_count, 0) AS dislike_count, + COALESCE(ld.labels, '[]'::jsonb) AS labels + FROM paginated_ids p + JOIN snippets s ON s.id = p.id + LEFT JOIN audio_files a ON s.audio_file = a.id + LEFT JOIN user_star_snippets us ON us.snippet = s.id AND us."user" = current_user_id + LEFT JOIN user_like_snippets ul ON ul.snippet = s.id AND ul."user" = current_user_id + LEFT JOIN user_hide_snippets uhs ON uhs.snippet = s.id + LEFT JOIN ( + SELECT + snippet_id, + jsonb_agg( + jsonb_build_object( + 'id', id, + 'text', text, + 'upvote_count', upvote_count, + 'upvoted_by_me', upvoted_by_me + ) + ) as labels + FROM label_summary + GROUP BY snippet_id + ) ld ON p.id = ld.snippet_id + ORDER BY + CASE + WHEN p_order_by = 'upvotes' THEN s.upvote_count + COALESCE(s.like_count, 0) + WHEN p_order_by = 'comments' THEN s.comment_count + WHEN p_order_by = 'activities' THEN + CASE + WHEN s.user_last_activity IS NULL THEN 0 + ELSE EXTRACT(EPOCH FROM s.user_last_activity) + END + END DESC, + s.recorded_at DESC + ) + SELECT + jsonb_agg( + jsonb_build_object( + 'id', ps.id, + 'recorded_at', ps.recorded_at, + 'user_last_activity', ps.user_last_activity, + 'duration', ps.duration, + 'start_time', ps.start_time, + 'end_time', ps.end_time, + 'file_path', ps.file_path, + 'file_size', ps.file_size, + 'political_leaning', ps.political_leaning, + 'title', ps.title, + 'summary', ps.summary, + 'explanation', ps.explanation, + 'confidence_scores', ps.confidence_scores, + 'language', ps.language, + 'context', ps.context, + 'labels', ps.labels, + 'audio_file', ps.audio_file, + 'starred_by_user', ps.starred_by_user, + 'user_like_status', ps.user_like_status, + 'hidden', ps.hidden, + 'like_count', ps.like_count, + 'dislike_count', ps.dislike_count + ) + ), + (SELECT cnt FROM total_count_cte) + INTO result, total_count + FROM paginated_snippets ps; + + total_pages := CEIL(total_count::FLOAT / page_size); + + RETURN jsonb_build_object( + 'num_of_snippets', total_count, + 'snippets', COALESCE(result, '[]'::jsonb), + 'current_page', page, + 'page_size', page_size, + 'total_pages', total_pages + ); +END; +$$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20260129_optimize_recording_filter_and_indexes.sql b/supabase/migrations/20260129_optimize_recording_filter_and_indexes.sql new file mode 100644 index 0000000..6c1b933 --- /dev/null +++ b/supabase/migrations/20260129_optimize_recording_filter_and_indexes.sql @@ -0,0 +1,27 @@ +-- Drop unused get_recording_filter_options function +-- It was a legacy version of get_filtering_options, with 0 callers in the codebase +-- and no calls in Supabase logs for 7+ days. The frontend uses get_filtering_options instead. +DROP FUNCTION IF EXISTS get_recording_filter_options(); + +-- Add indexes for unindexed foreign keys (identified by Supabase Database Advisor) +-- These improve JOIN performance when querying related tables + +-- Index on comment_reactions.comment_id for faster comment reaction lookups +CREATE INDEX IF NOT EXISTS idx_comment_reactions_comment_id +ON public.comment_reactions(comment_id); + +-- Index on comments.room_id for faster room-based queries (FK: comments_duplicate_room_id_fkey) +CREATE INDEX IF NOT EXISTS idx_comments_room_id +ON public.comments(room_id); + +-- Index on snippets.stage_1_llm_response for faster joins to stage_1_llm_responses +CREATE INDEX IF NOT EXISTS idx_snippets_stage_1_llm_response +ON public.snippets(stage_1_llm_response); + +-- Index on user_roles.role for faster role lookups +CREATE INDEX IF NOT EXISTS idx_user_roles_role +ON public.user_roles(role); + +-- Note: Duplicate indexes (audio_files_id_key, comments_duplicate_comment_id_key) were identified +-- but cannot be safely removed as they have FK dependencies. These waste ~17MB storage +-- but don't impact query performance.