aboutsummaryrefslogtreecommitdiff
path: root/backend/data/genOtolData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-05-28 11:30:32 +1000
committerTerry Truong <terry06890@gmail.com>2022-05-28 11:30:32 +1000
commit9f0dcdea1049a59cd1fb4f0872edee1b7a87a4e6 (patch)
tree3bb51b1bf2fd4ce1f25522b60b1dfbb17d8a4811 /backend/data/genOtolData.py
parent14f20c9e4e27bf0e57b06c7251e17bef9ad10c67 (diff)
Fix tree-trimming to not discard certain nodes
Was trimming off nodes without an image/desc/reduced_tree_presence, including those with a descdendant that has them. Was using a linked-image association to prevent this, but this wasn't reliable.
Diffstat (limited to 'backend/data/genOtolData.py')
-rwxr-xr-xbackend/data/genOtolData.py19
1 files changed, 17 insertions, 2 deletions
diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py
index 252e9f2..87db2c4 100755
--- a/backend/data/genOtolData.py
+++ b/backend/data/genOtolData.py
@@ -141,14 +141,29 @@ def parseNewickName():
return [match.group(1).replace("_", " "), match.group(2)]
rootId = parseNewick()
# For nodes with *many* children, remove some of those children
-print("Trimming nodes from tree")
+print("Getting nodes for which to avoid trimming")
namesToKeep = set()
if os.path.exists(keptNamesFile):
- with open(keptNamesFile) as file: # Contains names with an image (incl linked), desc, or reduced-tree-presence
+ with open(keptNamesFile) as file: # Contains names with an image, desc, or reduced-tree-presence
for line in file:
namesToKeep.add(line.rstrip())
else:
print("WARNING: No '{}' file found".format(keptNamesFile))
+print("Read in {} nodes".format(len(namesToKeep)))
+keptAncestors = set()
+for name in namesToKeep:
+ if name in nameToFirstId:
+ ids = [nameToFirstId[name]] if name not in dupNameToIds else dupNameToIds[name]
+ for id in ids:
+ parentId = nodeMap[id]["parent"]
+ while parentId != None:
+ parentObj = nodeMap[parentId]
+ keptAncestors.add(parentObj["name"])
+ parentId = parentObj["parent"]
+oldNamesToKeepSz = len(namesToKeep)
+namesToKeep.update(keptAncestors)
+print("Added {} ancestor nodes".format(len(namesToKeep) - oldNamesToKeepSz))
+print("Trimming nodes from tree")
def trimChildren(nodeId):
""" Traverse node tree, looking for nodes with too many children """
nodeObj = nodeMap[nodeId]