Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions node_normalizer/model/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ class CurieList(BaseModel):
title="Whether to return individual types for equivalent identifiers"
)

include_taxa: bool = Field(
True,
title="Whether to return taxa for equivalent identifiers"
)

class Config:
schema_extra = {
"example": {
Expand Down
48 changes: 29 additions & 19 deletions node_normalizer/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
from fastapi import FastAPI
from reasoner_pydantic import KnowledgeGraph, Message, QueryGraph, Result, CURIE, Attribute

from .util import LoggingUtil, uniquify_list, BIOLINK_NAMED_THING
from .util import (
LoggingUtil,
uniquify_list,
BIOLINK_NAMED_THING,
get_numerical_curie_suffix,
)

# logger = LoggingUtil.init_logging(__name__, level=logging.INFO, format='medium', logFilePath=os.path.dirname(__file__), logFileLevel=logging.INFO)
logger = LoggingUtil.init_logging()
Expand Down Expand Up @@ -514,7 +519,7 @@ async def get_eqids_and_types(

# Every equivalent identifier here has the same type.
for eqid in eqids[index]:
eqid.update({'t': [typ]})
eqid.update({'types': [typ]})

return eqids, types_with_ancestors

Expand All @@ -525,7 +530,8 @@ async def get_normalized_nodes(
conflate_gene_protein: bool,
conflate_chemical_drug: bool,
include_descriptions: bool = False,
include_individual_types: bool = True
include_individual_types: bool = True,
include_taxa: bool = True,
) -> Dict[str, Optional[str]]:
"""
Get value(s) for key(s) using redis MGET
Expand Down Expand Up @@ -634,6 +640,7 @@ async def get_normalized_nodes(
input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents,
include_descriptions=include_descriptions,
include_individual_types=include_individual_types,
include_taxa=include_taxa,
conflations={
'GeneProtein': conflate_gene_protein,
'DrugChemical': conflate_chemical_drug,
Expand Down Expand Up @@ -674,7 +681,7 @@ async def get_info_content_attribute(app, canonical_nonan) -> dict:


async def create_node(app, canonical_id, equivalent_ids, types, info_contents, include_descriptions=True,
include_individual_types=False, conflations=None):
include_individual_types=False, include_taxa=False, conflations=None):
"""Construct the output format given the compressed redis data"""
# It's possible that we didn't find a canonical_id
if canonical_id is None:
Expand Down Expand Up @@ -791,31 +798,34 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i
# Now that we've determined a label for this clique, we should never use identifiers_with_labels, possible_labels,
# or filtered_possible_labels after this point.

# if descriptions are enabled look for the first available description and use that
if include_descriptions:
descriptions = list(
map(
lambda x: x[0],
filter(lambda x: len(x) > 0, [eid['d'] for eid in eids if 'd' in eid])
)
)
if len(descriptions) > 0:
node["id"]["description"] = descriptions[0]

# now need to reformat the identifier keys. It could be cleaner but we have to worry about if there is a label
first_description = None
node_taxa = set()
node["equivalent_identifiers"] = []
for eqid in eids:
eq_item = {"identifier": eqid["i"]}
if "l" in eqid:
if "l" in eqid and eqid["l"]:
eq_item["label"] = eqid["l"]
# if descriptions is enabled and exist add them to each eq_id entry
if include_descriptions and "d" in eqid and len(eqid["d"]):
if include_descriptions and "d" in eqid and len(eqid["d"]) > 0:
eq_item["description"] = eqid["d"][0]
if not first_description:
first_description = eq_item["description"]
# if include_taxa is enabled and we have taxa on this node, add them to every eq_id entry
if include_taxa and "t" in eqid and eqid["t"]:
eq_item["taxa"] = eqid["t"]
node_taxa.update(eqid["t"])
# if individual types have been requested, add them too.
if include_individual_types and 't' in eqid:
eq_item["type"] = eqid['t'][-1]
if include_individual_types and 'types' in eqid:
eq_item["type"] = eqid['types'][-1]
node["equivalent_identifiers"].append(eq_item)

if include_descriptions and first_description:
node["description"] = first_description

if include_taxa and node_taxa:
node["taxa"] = sorted(node_taxa, key=get_numerical_curie_suffix)

# We need to remove `biolink:Entity` from the types returned.
# (See explanation at https://github.com/TranslatorSRI/NodeNormalization/issues/173)
if 'biolink:Entity' in types[canonical_id]:
Expand Down
11 changes: 8 additions & 3 deletions node_normalizer/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,18 @@ async def get_normalized_node_handler(
conflate: bool = fastapi.Query(True, description="Whether to apply gene/protein conflation"),
drug_chemical_conflate: bool = fastapi.Query(False, description="Whether to apply drug/chemical conflation"),
description: bool = fastapi.Query(False, description="Whether to return curie descriptions when possible"),
individual_types: bool = fastapi.Query(False, description="Whether to return individual types for equivalent identifiers")
individual_types: bool = fastapi.Query(False, description="Whether to return individual types for equivalent identifiers"),
include_taxa: bool = fastapi.Query(True, description="Whether to return taxa for equivalent identifiers"),
):
"""
Get value(s) for key(s) using redis MGET
"""
# no_conflate = request.args.get('dontconflate',['GeneProtein'])
normalized_nodes = await get_normalized_nodes(app, curie, conflate, drug_chemical_conflate,
include_descriptions=description,
include_individual_types=individual_types)
include_individual_types=individual_types,
include_taxa=include_taxa,
)

# If curie contains at least one entry, then the only way normalized_nodes could be blank
# would be if an error occurred during processing.
Expand All @@ -291,7 +294,9 @@ async def get_normalized_node_handler_post(curies: CurieList):
Get value(s) for key(s) using redis MGET
"""
normalized_nodes = await get_normalized_nodes(app, curies.curies, curies.conflate, curies.drug_chemical_conflate,
curies.description, include_individual_types=curies.individual_types)
curies.description, include_individual_types=curies.individual_types,
include_taxa=curies.include_taxa,
)

# If curies.curies contains at least one entry, then the only way normalized_nodes could be blank
# would be if an error occurred during processing.
Expand Down
2 changes: 1 addition & 1 deletion node_normalizer/set_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def generate_setid(app, curies, conflations) -> SetIDResponse:

# We use get_normalized_nodes() to normalize all the CURIEs for us.
normalization_results = await get_normalized_nodes(
app, curies, gene_protein_conflation, drug_chemical_conflation, include_descriptions=False
app, curies, gene_protein_conflation, drug_chemical_conflation, include_descriptions=False, include_individual_types=False, include_taxa=False
)

# We prepare a set of sorted, deduplicated curies.
Expand Down
14 changes: 14 additions & 0 deletions node_normalizer/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
# Some constants.
BIOLINK_NAMED_THING = "biolink:NamedThing"

def get_numerical_curie_suffix(curie):
"""
If a CURIE has a numerical suffix, return it as an integer. Otherwise return None.
:param curie: A CURIE.
:return: An integer if the CURIE suffix is castable to int, otherwise None.
"""
curie_parts = curie.split(":", 1)
if len(curie_parts) > 0:
# Try to cast the CURIE suffix to an integer. If we get a ValueError, don't worry about it.
try:
return int(curie_parts[1])
except ValueError:
pass
return None

# loggers = {}
class LoggingUtil(object):
Expand Down