-
Notifications
You must be signed in to change notification settings - Fork 8
Fix information content values for conflation #366
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
7780473
3a56fde
b8e3275
7eb9d9b
d6122e9
f95deef
57e866e
d3d9ad6
d3eb446
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -558,9 +558,6 @@ async def get_normalized_nodes( | |
|
|
||
| # did we get some canonical ids | ||
| if canonical_nonan: | ||
| # get the information content values | ||
| info_contents = await get_info_content(app, canonical_nonan) | ||
|
|
||
| # Get the equivalent_ids and types | ||
| eqids, types = await get_eqids_and_types(app, canonical_nonan) | ||
|
|
||
|
|
@@ -597,6 +594,7 @@ async def get_normalized_nodes( | |
| for canon, oids in zip(itertools.cycle(canonical_nonan), other_ids): | ||
| dereference_others[canon].extend(oids) | ||
|
|
||
| # sum(other_ids, []) is basically other_ids.flatten(). | ||
| all_other_ids = sum(other_ids, []) | ||
| eqids2, types2 = await get_eqids_and_types(app, all_other_ids) | ||
|
|
||
|
|
@@ -612,32 +610,50 @@ async def get_normalized_nodes( | |
|
|
||
| zipped = zip(canonical_nonan, eqids, types) | ||
|
|
||
| # Look up all the information content values. | ||
| info_contents_all = await get_info_content(app, all_other_ids) | ||
| info_contents = {} | ||
|
|
||
| # Apparently sometimes we can get to the final info_contents calculation without going through | ||
| # resetting the ic_vals. | ||
| ic_vals = None | ||
|
|
||
| for canonical_id, e, t in zipped: | ||
| # here's where we replace the eqids, types | ||
| if len(dereference_others[canonical_id]) > 0: | ||
| e = [] | ||
| t = [] | ||
| ic_vals = [] | ||
|
|
||
| for other in dereference_others[canonical_id]: | ||
| # logging.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}") | ||
| e += deref_others_eqs[other] | ||
| t += deref_others_typ[other] | ||
| if other in info_contents_all and info_contents_all[other]: | ||
| ic_vals.append(info_contents_all[other]) | ||
|
|
||
| final_eqids.append(e) | ||
| final_types.append(uniquify_list(t)) | ||
|
|
||
| # What's the smallest IC value for this canonical ID? | ||
| info_contents[canonical_id] = min(ic_vals) if ic_vals else None | ||
|
|
||
| dereference_ids = dict(zip(canonical_nonan, final_eqids)) | ||
| dereference_types = dict(zip(canonical_nonan, final_types)) | ||
|
|
||
| else: | ||
| dereference_ids = dict(zip(canonical_nonan, eqids)) | ||
| dereference_types = dict(zip(canonical_nonan, types)) | ||
|
|
||
| # get the information content values | ||
| info_contents = await get_info_content(app, canonical_nonan) | ||
| else: | ||
| dereference_ids = dict() | ||
| dereference_types = dict() | ||
|
|
||
| # output the final result | ||
| normal_nodes = { | ||
| input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, | ||
| input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, info_contents_all, | ||
|
||
| include_descriptions=include_descriptions, | ||
| include_individual_types=include_individual_types, | ||
| include_taxa=include_taxa, | ||
|
|
@@ -680,7 +696,7 @@ async def get_info_content_attribute(app, canonical_nonan) -> dict: | |
| return new_attrib | ||
|
|
||
|
|
||
| async def create_node(app, canonical_id, equivalent_ids, types, info_contents, include_descriptions=True, | ||
| async def create_node(app, canonical_id, equivalent_ids, types, info_contents, info_contents_all, include_descriptions=True, | ||
| include_individual_types=False, include_taxa=False, conflations=None): | ||
| """Construct the output format given the compressed redis data""" | ||
| # It's possible that we didn't find a canonical_id | ||
|
|
@@ -821,6 +837,10 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i | |
| eq_item["type"] = eqid['types'][-1] | ||
| node["equivalent_identifiers"].append(eq_item) | ||
|
|
||
| # TODO: figure out if this slows us down significantly. | ||
| if eqid['i'] in info_contents_all: | ||
| eq_item["information_content"] = info_contents_all[eqid['i']] | ||
|
|
||
| if include_descriptions and descriptions: | ||
| node["descriptions"] = descriptions | ||
| node["id"]["description"] = descriptions[0] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable
ic_valsis only initialized inside the conditional block at line 619-622, but it is referenced at line 635 which is outside that block. Iflen(dereference_others[canonical_id])is 0 for any canonical_id in the loop,ic_valswill be undefined when accessed at line 635, causing a NameError. The initialization ofic_valsshould be moved outside the conditional block to line 617 or 618.