diff options
Diffstat (limited to 'backend/data/genLinkedImgs.py')
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 125 |
1 files changed, 0 insertions, 125 deletions
diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py deleted file mode 100755 index a8e1322..0000000 --- a/backend/data/genLinkedImgs.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/python3 - -import sys, re -import sqlite3 - -usageInfo = f""" -Usage: {sys.argv[0]} - -Look for nodes without images in the database, and tries to -associate them with images from their children. -""" -if len(sys.argv) > 1: - print(usageInfo, file=sys.stderr) - sys.exit(1) - -dbFile = "data.db" -compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]") -upPropagateCompoundImgs = False - -print("Opening databases") -dbCon = sqlite3.connect(dbFile) -dbCur = dbCon.cursor() -dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)") - -print("Getting nodes with images") -resolvedNodes = {} # Will map node names to otol IDs with a usable image -query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" -for (name, otolId) in dbCur.execute(query): - resolvedNodes[name] = otolId -print(f"Found {len(resolvedNodes)}") - -print("Iterating through nodes, trying to resolve images for ancestors") -nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images -processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used -parentToChosenTips = {} # used to prefer images from children with more tips -iterNum = 0 -while len(resolvedNodes) > 0: - iterNum += 1 - if iterNum % 1e3 == 0: - print(f"At iteration {iterNum}") - # Get next node - (nodeName, otolId) = resolvedNodes.popitem() - processedNodes[nodeName] = otolId - # Traverse upwards, resolving ancestors if able - while True: - # Get parent - row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone() - if row == None or row[0] in processedNodes or row[0] in resolvedNodes: - break - parent = row[0] - # Get parent data - if parent not in nodesToResolve: - childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))] - query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames))) - childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)] - childObjs.sort(key=lambda x: x["tips"], reverse=True) - nodesToResolve[parent] = childObjs - else: - childObjs = nodesToResolve[parent] - # Check if highest-tips child - if (childObjs[0]["name"] == nodeName): - # Resolve parent, and continue from it - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, otolId)) - del nodesToResolve[parent] - processedNodes[parent] = otolId - parentToChosenTips[parent] = childObjs[0]["tips"] - nodeName = parent - continue - else: - # Mark child as a potential choice - childObj = next(c for c in childObjs if c["name"] == nodeName) - childObj["otolId"] = otolId - break - # When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve - if len(resolvedNodes) == 0: - for (name, childObjs) in nodesToResolve.items(): - childObj = next(c for c in childObjs if c["otolId"] != None) - resolvedNodes[name] = childObj["otolId"] - parentToChosenTips[name] = childObj["tips"] - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"])) - nodesToResolve.clear() - -print("Replacing linked-images for compound nodes") -iterNum = 0 -for nodeName in processedNodes.keys(): - iterNum += 1 - if iterNum % 1e4 == 0: - print(f"At iteration {iterNum}") - # - match = compoundNameRegex.fullmatch(nodeName) - if match != None: - # Replace associated image with subname images - (subName1, subName2) = match.group(1,2) - otolIdPair = ["", ""] - if subName1 in processedNodes: - otolIdPair[0] = processedNodes[subName1] - if subName2 in processedNodes: - otolIdPair[1] = processedNodes[subName2] - # Use no image if both subimages not found - if otolIdPair[0] == "" and otolIdPair[1] == "": - dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,)) - continue - # Add to db - dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?", - (otolIdPair[0] + "," + otolIdPair[1], nodeName)) - # Possibly repeat operation upon parent/ancestors - if upPropagateCompoundImgs: - while True: - # Get parent - row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone() - if row != None: - parent = row[0] - # Check num tips - (numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone() - if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips: - # Replace associated image - dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?", - (otolIdPair[0] + "," + otolIdPair[1], parent)) - nodeName = parent - continue - break - -print("Closing databases") -dbCon.commit() -dbCon.close() |
