Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 110 additions & 56 deletions supabase/database/sql/get_snippets_function.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,16 @@ BEGIN

user_is_admin := COALESCE('admin' = ANY(user_roles), FALSE);

CREATE TEMP TABLE filtered_snippets AS (
-- Pre-compute all label data with upvote counts
WITH label_data AS (
SELECT
sl.snippet,
COALESCE(jsonb_agg(
jsonb_build_object(
'id', l.id,
'text', CASE
WHEN p_language = 'spanish' THEN l.text_spanish
ELSE l.text
END,
'upvote_count', COALESCE(upvote_counts.count, 0),
'upvoted_by_me', COALESCE(upvote_counts.upvoted_by_current_user, false)
)
), '[]'::jsonb) AS labels
FROM public.snippet_labels sl
JOIN public.labels l ON sl.label = l.id
LEFT JOIN LATERAL (
SELECT
COUNT(*) AS count,
BOOL_OR(lu.upvoted_by = current_user_id) AS upvoted_by_current_user
FROM public.label_upvotes lu
WHERE lu.snippet_label = sl.id
) upvote_counts ON TRUE
GROUP BY sl.snippet
)
WITH
like_summary AS (
SELECT
snippet,
COUNT(*) FILTER (WHERE value = 1) AS likes,
COUNT(*) FILTER (WHERE value = -1) AS dislikes
FROM user_like_snippets
GROUP BY snippet
),
filtered_snippets AS (
SELECT
s.id,
s.recorded_at,
Expand All @@ -81,7 +64,8 @@ BEGIN
s.confidence_scores,
s.language,
s.context,
COALESCE(ld.labels, '[]'::jsonb) AS labels,
s.upvote_count,
s.comment_count,
jsonb_build_object(
'id', a.id,
'radio_station_name', a.radio_station_name,
Expand All @@ -92,21 +76,14 @@ BEGIN
us.id IS NOT NULL AS starred_by_user,
ul.value AS user_like_status,
uhs.snippet IS NOT NULL AS hidden,
like_counts.likes AS like_count,
like_counts.dislikes AS dislike_count
COALESCE(lk.likes, 0) AS like_count,
COALESCE(lk.dislikes, 0) AS dislike_count
FROM snippets s
LEFT JOIN audio_files a ON s.audio_file = a.id
LEFT JOIN label_data ld ON ld.snippet = s.id
LEFT JOIN like_summary lk ON lk.snippet = s.id
LEFT JOIN user_star_snippets us ON us.snippet = s.id AND us."user" = current_user_id
LEFT JOIN user_like_snippets ul ON ul.snippet = s.id AND ul."user" = current_user_id
LEFT JOIN user_hide_snippets uhs ON uhs.snippet = s.id
CROSS JOIN LATERAL (
SELECT
COUNT(*) FILTER (WHERE value = 1) AS likes,
COUNT(*) FILTER (WHERE value = -1) AS dislikes
FROM user_like_snippets uls
WHERE uls.snippet = s.id
) like_counts
WHERE s.status = 'Processed' AND (s.confidence_scores->>'overall')::INTEGER >= 95
AND (
-- If user is admin, show all snippets (including hidden ones)
Expand Down Expand Up @@ -288,30 +265,107 @@ BEGIN
OR s.translation &@ trimmed_search_term
)
)
ORDER BY
),
paginated_snippets AS (
SELECT fs.*, COUNT(*) OVER() AS num_of_snippets
FROM filtered_snippets fs
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ORDER BY logic is duplicated in both paginated_snippets and paginated_snippets_with_labels CTEs. Consolidating the ordering logic into one place would reduce maintenance overhead and potential inconsistencies.

ORDER BY
CASE
WHEN p_order_by = 'upvotes' THEN s.upvote_count + s.like_count
WHEN p_order_by = 'comments' THEN s.comment_count
WHEN p_order_by = 'activities' THEN
WHEN p_order_by = 'upvotes' THEN fs.upvote_count + fs.like_count
WHEN p_order_by = 'comments' THEN fs.comment_count
WHEN p_order_by = 'activities' THEN
CASE
WHEN s.user_last_activity IS NULL THEN 0
ELSE EXTRACT(EPOCH FROM s.user_last_activity)
WHEN fs.user_last_activity IS NULL THEN 0
ELSE EXTRACT(EPOCH FROM fs.user_last_activity)
END
END DESC,
s.recorded_at DESC -- Default for all other cases, including p_order_by = 'latest'
);

SELECT COUNT(*) INTO total_count
FROM filtered_snippets;

SELECT jsonb_agg(fs.*) INTO result
FROM (
SELECT * FROM filtered_snippets
fs.recorded_at DESC -- Default for all other cases, including p_order_by = 'latest'
LIMIT page_size
OFFSET page * page_size
) fs;

DROP TABLE filtered_snippets;
),
label_summary AS (
SELECT
l.id,
CASE
WHEN p_language = 'spanish' THEN l.text_spanish
ELSE l.text
END AS text,
COALESCE(lu.upvote_count, 0) AS upvote_count,
COALESCE(lu.upvoted_by_me, FALSE) AS upvoted_by_me,
sl.snippet AS snippet_id
FROM snippet_labels sl
JOIN labels l ON l.id = sl.label
LEFT JOIN (
SELECT
snippet_label,
COUNT(*) AS upvote_count,
BOOL_OR(upvoted_by = current_user_id) AS upvoted_by_me
FROM label_upvotes lu
GROUP BY snippet_label
) lu ON lu.snippet_label = sl.id
Comment on lines +298 to +305
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation of the lu subquery calculates aggregates over the entire label_upvotes table for every function call, which can be inefficient if the table grows large. A more performant approach is to use a LATERAL join. This will calculate upvote counts only for the labels belonging to the snippets on the current page, significantly reducing the amount of data processed.

        LEFT JOIN LATERAL (
            SELECT
                COUNT(*) AS upvote_count,
                BOOL_OR(upvoted_by = current_user_id) AS upvoted_by_me
            FROM public.label_upvotes lu
            WHERE lu.snippet_label = sl.id
        ) lu ON TRUE

WHERE sl.snippet IN (SELECT id FROM paginated_snippets)
),
paginated_snippets_with_labels AS (
SELECT
ps.*,
COALESCE(ld.labels, '[]'::jsonb) AS labels
FROM paginated_snippets ps
LEFT JOIN (
SELECT
snippet_id,
jsonb_agg(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the order of labels within each snippet is important, consider adding an explicit ORDER BY inside the jsonb_agg call. Without it, the aggregated JSON array order may be unpredictable.

jsonb_build_object(
'id', id,
'text', text,
'upvote_count', upvote_count,
'upvoted_by_me', upvoted_by_me
)
) as labels
FROM label_summary
GROUP BY snippet_id
) ld ON ps.id = ld.snippet_id
ORDER BY
CASE
WHEN p_order_by = 'upvotes' THEN ps.upvote_count + ps.like_count
WHEN p_order_by = 'comments' THEN ps.comment_count
WHEN p_order_by = 'activities' THEN
CASE
WHEN ps.user_last_activity IS NULL THEN 0
ELSE EXTRACT(EPOCH FROM ps.user_last_activity)
END
END DESC,
ps.recorded_at DESC -- Default for all other cases, including p_order_by = 'latest'
Comment on lines +327 to +337
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This ORDER BY clause is a duplicate of the one in the paginated_snippets CTE (lines 272-282). To improve maintainability and avoid having to update sorting logic in two places, you can remove this ORDER BY clause and apply the ordering directly within the jsonb_agg function in the final SELECT statement.

For example:

SELECT
    jsonb_agg(
        jsonb_build_object(...) 
        ORDER BY
            CASE ... END DESC,
            ps.recorded_at DESC
    ),
    MAX(ps.num_of_snippets)
INTO result, total_count
FROM paginated_snippets_with_labels ps;

)
SELECT
jsonb_agg(
jsonb_build_object(
'id', ps.id,
'recorded_at', ps.recorded_at,
'user_last_activity', ps.user_last_activity,
'duration', ps.duration,
'start_time', ps.start_time,
'end_time', ps.end_time,
'file_path', ps.file_path,
'file_size', ps.file_size,
'political_leaning', ps.political_leaning,
'title', ps.title,
'summary', ps.summary,
'explanation', ps.explanation,
'confidence_scores', ps.confidence_scores,
'language', ps.language,
'context', ps.context,
'labels', ps.labels,
'audio_file', ps.audio_file,
'starred_by_user', ps.starred_by_user,
'user_like_status', ps.user_like_status,
'hidden', ps.hidden,
'like_count', ps.like_count,
'dislike_count', ps.dislike_count
)
Comment on lines +341 to +364
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instead of manually constructing the JSON object by listing every column, you can use to_jsonb(ps) - 'num_of_snippets' to achieve the same result more concisely and robustly. This approach automatically includes all columns from the paginated_snippets_with_labels CTE (except for the internal num_of_snippets column), making the code easier to maintain. If you add or remove columns from the filtered_snippets CTE in the future, they will be automatically reflected in the output without needing to modify this part of the query.

            to_jsonb(ps) - 'num_of_snippets'

),
MAX(ps.num_of_snippets)
INTO result, total_count
Comment on lines +366 to +367
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Coalesce total_count so empty pages don’t return NULL pagination metadata

When the filtered set is empty, MAX(ps.num_of_snippets) yields NULL, so total_count stays NULL, CEIL(total_count::FLOAT / page_size) propagates NULL, and the response surfaces num_of_snippets/total_pages as null. That breaks downstream consumers expecting integers (e.g., page counts on the first empty page). Please coalesce the aggregate to 0 before assigning.

-        MAX(ps.num_of_snippets)
+        COALESCE(MAX(ps.num_of_snippets), 0)
🤖 Prompt for AI Agents
In supabase/database/sql/get_snippets_function.sql around lines 366 to 367, the
MAX(ps.num_of_snippets) aggregate can return NULL for empty result sets so
total_count becomes NULL and breaks pagination; change the assignment to
coalesce the aggregate to 0 (e.g., use COALESCE(MAX(ps.num_of_snippets), 0))
before INTO result, total_count so empty pages yield 0 for total_count and
subsequent CEIL/num_of_snippets calculations return integers.

FROM paginated_snippets_with_labels ps;

total_pages := CEIL(total_count::FLOAT / page_size);

Expand Down