diff --git a/documentation/API.md b/documentation/API.md index 5231d69..74cd2ec 100644 --- a/documentation/API.md +++ b/documentation/API.md @@ -51,6 +51,9 @@ Example output: }, [...] ], + "descriptions": [ + "An oxygen hydride consisting of an oxygen atom that is covalently bonded to two hydrogen atoms" + ], "type": [ "biolink:SmallMolecule", "biolink:MolecularEntity", @@ -82,6 +85,8 @@ Example output: Each identifier includes an `identifier` (a CURIE), a `label` (which corresponds to the label of the CURIE as per its authoritative source), a `description` (currently only taken from UberGraph), and (if `individual_types` is set) the Biolink type of each identifier. This list is ordered in the Biolink Model's preferred prefix order for this class. + * `descriptions`: a list of unique descriptions for the identifiers within this clique. This list is ordered in the same + order as `equivalent_identifiers`. * `type`: The list of Biolink classes for this clique, starting with the most specific type (in this example, `biolink:SmallMolecule`), and ending with any mixins that include this class. * `information_content`: the information content value between 0 and 100. This is calculated by retrieving the diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index d346996..32d9126 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -799,18 +799,19 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i # or filtered_possible_labels after this point. # now need to reformat the identifier keys. It could be cleaner but we have to worry about if there is a label - first_description = None + descriptions = [] node_taxa = set() node["equivalent_identifiers"] = [] for eqid in eids: eq_item = {"identifier": eqid["i"]} if "l" in eqid and eqid["l"]: eq_item["label"] = eqid["l"] - # if descriptions is enabled and exist add them to each eq_id entry + # if descriptions is enabled, add it to descriptions. if include_descriptions and "d" in eqid and len(eqid["d"]) > 0: - eq_item["description"] = eqid["d"][0] - if not first_description: - first_description = eq_item["description"] + desc = eqid["d"][0] + eq_item["description"] = desc + if desc not in descriptions: + descriptions.append(desc) # if include_taxa is enabled and we have taxa on this node, add them to every eq_id entry if include_taxa and "t" in eqid and eqid["t"]: eq_item["taxa"] = eqid["t"] @@ -820,8 +821,9 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i eq_item["type"] = eqid['types'][-1] node["equivalent_identifiers"].append(eq_item) - if include_descriptions and first_description: - node["description"] = first_description + if include_descriptions and descriptions: + node["descriptions"] = descriptions + node["id"]["description"] = descriptions[0] if include_taxa and node_taxa: node["taxa"] = sorted(node_taxa, key=get_numerical_curie_suffix)