Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions node_normalizer/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,9 +558,6 @@ async def get_normalized_nodes(

# did we get some canonical ids
if canonical_nonan:
# get the information content values
info_contents = await get_info_content(app, canonical_nonan)

# Get the equivalent_ids and types
eqids, types = await get_eqids_and_types(app, canonical_nonan)

Expand Down Expand Up @@ -597,6 +594,7 @@ async def get_normalized_nodes(
for canon, oids in zip(itertools.cycle(canonical_nonan), other_ids):
dereference_others[canon].extend(oids)

# sum(other_ids, []) is basically other_ids.flatten().
all_other_ids = sum(other_ids, [])
eqids2, types2 = await get_eqids_and_types(app, all_other_ids)

Expand All @@ -612,32 +610,50 @@ async def get_normalized_nodes(

zipped = zip(canonical_nonan, eqids, types)

# Look up all the information content values.
info_contents_all = await get_info_content(app, all_other_ids)
info_contents = {}

# Apparently sometimes we can get to the final info_contents calculation without going through
# resetting the ic_vals.
ic_vals = None

for canonical_id, e, t in zipped:
# here's where we replace the eqids, types
if len(dereference_others[canonical_id]) > 0:
e = []
t = []
ic_vals = []

for other in dereference_others[canonical_id]:
# logging.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}")
e += deref_others_eqs[other]
t += deref_others_typ[other]
if other in info_contents_all and info_contents_all[other]:
ic_vals.append(info_contents_all[other])

final_eqids.append(e)
final_types.append(uniquify_list(t))

# What's the smallest IC value for this canonical ID?
info_contents[canonical_id] = min(ic_vals) if ic_vals else None
Comment on lines +626 to +639
Copy link

Copilot AI Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable ic_vals is only initialized inside the conditional block at line 619-622, but it is referenced at line 635 which is outside that block. If len(dereference_others[canonical_id]) is 0 for any canonical_id in the loop, ic_vals will be undefined when accessed at line 635, causing a NameError. The initialization of ic_vals should be moved outside the conditional block to line 617 or 618.

Copilot uses AI. Check for mistakes.

dereference_ids = dict(zip(canonical_nonan, final_eqids))
dereference_types = dict(zip(canonical_nonan, final_types))

else:
dereference_ids = dict(zip(canonical_nonan, eqids))
dereference_types = dict(zip(canonical_nonan, types))

# get the information content values
info_contents = await get_info_content(app, canonical_nonan)
else:
dereference_ids = dict()
dereference_types = dict()

# output the final result
normal_nodes = {
input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents,
input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, info_contents_all,
Copy link

Copilot AI Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable info_contents_all is not defined in the non-conflation path (lines 640-645) or when canonical_nonan is empty (lines 646-648). This will cause a NameError when create_node is called in these cases. The variable needs to be initialized in all code paths before this call.

Copilot uses AI. Check for mistakes.
include_descriptions=include_descriptions,
include_individual_types=include_individual_types,
include_taxa=include_taxa,
Expand Down Expand Up @@ -680,7 +696,7 @@ async def get_info_content_attribute(app, canonical_nonan) -> dict:
return new_attrib


async def create_node(app, canonical_id, equivalent_ids, types, info_contents, include_descriptions=True,
async def create_node(app, canonical_id, equivalent_ids, types, info_contents, info_contents_all, include_descriptions=True,
include_individual_types=False, include_taxa=False, conflations=None):
"""Construct the output format given the compressed redis data"""
# It's possible that we didn't find a canonical_id
Expand Down Expand Up @@ -821,6 +837,10 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i
eq_item["type"] = eqid['types'][-1]
node["equivalent_identifiers"].append(eq_item)

# TODO: figure out if this slows us down significantly.
if eqid['i'] in info_contents_all:
eq_item["information_content"] = info_contents_all[eqid['i']]

if include_descriptions and descriptions:
node["descriptions"] = descriptions
node["id"]["description"] = descriptions[0]
Expand Down