From 9f0dcdea1049a59cd1fb4f0872edee1b7a87a4e6 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sat, 28 May 2022 11:30:32 +1000 Subject: Fix tree-trimming to not discard certain nodes Was trimming off nodes without an image/desc/reduced_tree_presence, including those with a descdendant that has them. Was using a linked-image association to prevent this, but this wasn't reliable. --- backend/data/genOtolData.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'backend/data/genOtolData.py') diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py index 252e9f2..87db2c4 100755 --- a/backend/data/genOtolData.py +++ b/backend/data/genOtolData.py @@ -141,14 +141,29 @@ def parseNewickName(): return [match.group(1).replace("_", " "), match.group(2)] rootId = parseNewick() # For nodes with *many* children, remove some of those children -print("Trimming nodes from tree") +print("Getting nodes for which to avoid trimming") namesToKeep = set() if os.path.exists(keptNamesFile): - with open(keptNamesFile) as file: # Contains names with an image (incl linked), desc, or reduced-tree-presence + with open(keptNamesFile) as file: # Contains names with an image, desc, or reduced-tree-presence for line in file: namesToKeep.add(line.rstrip()) else: print("WARNING: No '{}' file found".format(keptNamesFile)) +print("Read in {} nodes".format(len(namesToKeep))) +keptAncestors = set() +for name in namesToKeep: + if name in nameToFirstId: + ids = [nameToFirstId[name]] if name not in dupNameToIds else dupNameToIds[name] + for id in ids: + parentId = nodeMap[id]["parent"] + while parentId != None: + parentObj = nodeMap[parentId] + keptAncestors.add(parentObj["name"]) + parentId = parentObj["parent"] +oldNamesToKeepSz = len(namesToKeep) +namesToKeep.update(keptAncestors) +print("Added {} ancestor nodes".format(len(namesToKeep) - oldNamesToKeepSz)) +print("Trimming nodes from tree") def trimChildren(nodeId): """ Traverse node tree, looking for nodes with too many children """ nodeObj = nodeMap[nodeId] -- cgit v1.2.3