From 60acfb1dbccb265896b4734cbf701fbb6132f3c5 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Tue, 7 Jun 2022 12:57:05 +1000 Subject: Add more manual-correction for generated otol-data --- backend/data/genOtolData.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'backend') diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py index cc2d854..5a89270 100755 --- a/backend/data/genOtolData.py +++ b/backend/data/genOtolData.py @@ -31,6 +31,7 @@ dbFile = "data.db" nodeMap = {} # Maps node IDs to node objects nameToFirstId = {} # Maps node names to first found ID (names might have multiple IDs) dupNameToIds = {} # Maps names of nodes with multiple IDs to those node IDs +pickedDupsFile = "genOtolDataPickedDups.txt" softChildLimit = 100 keptNamesFile = "genOtolNamesToKeep.txt" # Contains names to keep when doing node trimming @@ -216,18 +217,27 @@ def deleteDownward(nodeId): del nodeMap[nodeId] # return tipsRemoved -trimChildren(rootId) +#trimChildren(rootId) # Resolve duplicate names print("Resolving duplicates") +nameToPickedId = {} +with open(pickedDupsFile) as file: + for line in file: + (name, _, otolId) = line.rstrip().partition("|") + nameToPickedId[name] = otolId for [dupName, ids] in dupNameToIds.items(): - # Get conflicting node with most tips - tipNums = [nodeMap[id]["tips"] for id in ids] - maxIdx = tipNums.index(max(tipNums)) - maxId = ids[maxIdx] + # Check for picked id + if dupName in nameToPickedId: + idToUse = nameToPickedId[dupName] + else: + # Get conflicting node with most tips + tipNums = [nodeMap[id]["tips"] for id in ids] + maxIdx = tipNums.index(max(tipNums)) + idToUse = ids[maxIdx] # Adjust name of other conflicting nodes counter = 2 for id in ids: - if id != maxId: + if id != idToUse: nodeMap[id]["name"] += " [" + str(counter)+ "]" counter += 1 # Change mrca* names -- cgit v1.2.3