diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-05-28 11:30:32 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-05-28 11:30:32 +1000 |
| commit | 9f0dcdea1049a59cd1fb4f0872edee1b7a87a4e6 (patch) | |
| tree | 3bb51b1bf2fd4ce1f25522b60b1dfbb17d8a4811 /backend/data | |
| parent | 14f20c9e4e27bf0e57b06c7251e17bef9ad10c67 (diff) | |
Fix tree-trimming to not discard certain nodes
Was trimming off nodes without an image/desc/reduced_tree_presence,
including those with a descdendant that has them. Was using a
linked-image association to prevent this, but this wasn't reliable.
Diffstat (limited to 'backend/data')
| -rw-r--r-- | backend/data/README.md | 6 | ||||
| -rwxr-xr-x | backend/data/genOtolData.py | 19 |
2 files changed, 20 insertions, 5 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index d444e4f..4655c2d 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -65,6 +65,6 @@ Other Files of possibly-significant nodes are removed, using a short-sighted heuristic. <br> One way to generate this list is to generate the files as usual, - then get node names that have an associated image, linked-image, - description, or presence in r_nodes. Then run the genOtolData.py - and genEolNameData.py scripts again. + then get node names that have an associated image, description, or + presence in r_nodes. Then run the genOtolData.py and genEolNameData.py + scripts again (after deleting their created tables). diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py index 252e9f2..87db2c4 100755 --- a/backend/data/genOtolData.py +++ b/backend/data/genOtolData.py @@ -141,14 +141,29 @@ def parseNewickName(): return [match.group(1).replace("_", " "), match.group(2)] rootId = parseNewick() # For nodes with *many* children, remove some of those children -print("Trimming nodes from tree") +print("Getting nodes for which to avoid trimming") namesToKeep = set() if os.path.exists(keptNamesFile): - with open(keptNamesFile) as file: # Contains names with an image (incl linked), desc, or reduced-tree-presence + with open(keptNamesFile) as file: # Contains names with an image, desc, or reduced-tree-presence for line in file: namesToKeep.add(line.rstrip()) else: print("WARNING: No '{}' file found".format(keptNamesFile)) +print("Read in {} nodes".format(len(namesToKeep))) +keptAncestors = set() +for name in namesToKeep: + if name in nameToFirstId: + ids = [nameToFirstId[name]] if name not in dupNameToIds else dupNameToIds[name] + for id in ids: + parentId = nodeMap[id]["parent"] + while parentId != None: + parentObj = nodeMap[parentId] + keptAncestors.add(parentObj["name"]) + parentId = parentObj["parent"] +oldNamesToKeepSz = len(namesToKeep) +namesToKeep.update(keptAncestors) +print("Added {} ancestor nodes".format(len(namesToKeep) - oldNamesToKeepSz)) +print("Trimming nodes from tree") def trimChildren(nodeId): """ Traverse node tree, looking for nodes with too many children """ nodeObj = nodeMap[nodeId] |
