From 4deebf0fe4274a8f1bac1f33c1fdb05f4d23cf9e Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 14 Mar 2025 03:11:21 -0700 Subject: [PATCH] Add Additional Statistics Routes Adds an additional two statistics routes for aggregate mapped variant data based on score sets. These routes make use of the new materialized view for published variants. --- src/mavedb/routers/score_sets.py | 31 +++++++++++++++++++++++++++++++ src/mavedb/routers/statistics.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 235f436f..65e83c0f 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -138,6 +138,37 @@ def search_score_sets( return fetch_superseding_score_set_in_search_result(score_sets, user_data, search) +@router.get("/score-sets/mapped-genes", status_code=200, response_model=dict[str, list[str]]) +def score_set_mapped_gene_mapping( + db: Session = Depends(deps.get_db), user_data: UserData = Depends(get_current_user) +) -> Any: + """ + Get a mapping of score set URNs to mapped gene symbols. + """ + save_to_logging_context({"requested_resource": "mapped-genes"}) + + score_sets_with_mapping_metadata = db.execute( + select(ScoreSet, TargetGene.post_mapped_metadata) + .join(ScoreSet) + .where(TargetGene.post_mapped_metadata.is_not(None)) + ).all() + + mapped_genes: dict[str, list[str]] = {} + for score_set_item, post_mapped_metadata in score_sets_with_mapping_metadata: + if not has_permission(user_data, score_set_item, Action.READ).permitted: + continue + + sequence_genes = [ + *post_mapped_metadata.get("genomic", {}).get("sequence_genes", []), + *post_mapped_metadata.get("protein", {}).get("sequence_genes", []), + ] + + if sequence_genes: + mapped_genes.setdefault(score_set_item.urn, []).extend(sequence_genes) + + return mapped_genes + + @router.post( "/me/score-sets/search", status_code=200, diff --git a/src/mavedb/routers/statistics.py b/src/mavedb/routers/statistics.py index f79d07f6..876eac14 100644 --- a/src/mavedb/routers/statistics.py +++ b/src/mavedb/routers/statistics.py @@ -277,6 +277,38 @@ def record_counts(model: RecordNames, group: Optional[GroupBy] = None, db: Sessi return OrderedDict(sorted(grouped.items())) +@router.get("/record/score-set/variant/count", status_code=200, response_model=dict[str, int]) +def record_variant_counts(db: Session = Depends(get_db)) -> dict[str, int]: + """ + Returns a dictionary of counts for the number of published and distinct variants in the database contained + within a given record. + """ + variants = db.execute( + select(PublishedVariantsMV.score_set_urn, func.count(PublishedVariantsMV.variant_id)) + .group_by(PublishedVariantsMV.score_set_urn) + .order_by(PublishedVariantsMV.score_set_urn) + ).all() + + grouped = {urn: sum(c for _, c in g) for urn, g in itertools.groupby(variants, lambda t: t[0])} + return OrderedDict(sorted(filter(lambda item: item[1] > 0, grouped.items()))) + + +@router.get("/record/score-set/mapped-variant/count", status_code=200, response_model=dict[str, int]) +def record_mapped_variant_counts(db: Session = Depends(get_db)) -> dict[str, int]: + """ + Returns a dictionary of counts for the number of published and distinct mapped variants in the database contained + within a given record. + """ + variants = db.execute( + select(PublishedVariantsMV.score_set_urn, func.count(PublishedVariantsMV.mapped_variant_id)) + .group_by(PublishedVariantsMV.score_set_urn) + .order_by(PublishedVariantsMV.score_set_urn) + ).all() + + grouped = {urn: sum(c for _, c in g) for urn, g in itertools.groupby(variants, lambda t: t[0])} + return OrderedDict(sorted(filter(lambda item: item[1] > 0, grouped.items()))) + + ######################################################################################## # Target statistics ########################################################################################