aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/genLinkedImgs.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-09-07 11:37:37 +1000
committerTerry Truong <terry06890@gmail.com>2022-09-07 11:37:37 +1000
commitdaccbbd9c73a5292ea9d6746560d7009e5aa666d (patch)
tree9156bf011ab6302eb3c0d219d40587d594f51841 /backend/tolData/genLinkedImgs.py
parent1a7fe33edafa68a6f759d124bdeee673ff9cf9ff (diff)
Add python type annotations
Also use consistent quote symbols Also use 'is None' instead of '== None' Also use 'if list1' instead of 'if len(list1) > 0'
Diffstat (limited to 'backend/tolData/genLinkedImgs.py')
-rwxr-xr-xbackend/tolData/genLinkedImgs.py102
1 files changed, 52 insertions, 50 deletions
diff --git a/backend/tolData/genLinkedImgs.py b/backend/tolData/genLinkedImgs.py
index eb991b9..6d2feff 100755
--- a/backend/tolData/genLinkedImgs.py
+++ b/backend/tolData/genLinkedImgs.py
@@ -1,6 +1,6 @@
#!/usr/bin/python3
-import sys, re
+import re
import sqlite3
import argparse
@@ -10,113 +10,115 @@ associate them with images from their children
""", formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
-dbFile = "data.db"
-compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]")
+dbFile = 'data.db'
+compoundNameRegex = re.compile(r'\[(.+) \+ (.+)]')
upPropagateCompoundImgs = False
-print("Opening databases")
+print('Opening databases')
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)")
+dbCur.execute('CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)')
-print("Getting nodes with images")
-resolvedNodes = {} # Will map node names to otol IDs with a usable image
-query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name"
-for (name, otolId) in dbCur.execute(query):
+print('Getting nodes with images')
+resolvedNodes: dict[str, str] = {} # Will map node names to otol IDs with a usable image
+query = 'SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name'
+for name, otolId in dbCur.execute(query):
resolvedNodes[name] = otolId
-print(f"Found {len(resolvedNodes)}")
+print(f'Found {len(resolvedNodes)}')
-print("Iterating through nodes, trying to resolve images for ancestors")
-nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images
-processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used
-parentToChosenTips = {} # Used to prefer images from children with more tips
+print('Iterating through nodes, trying to resolve images for ancestors')
+nodesToResolve: dict[str, list[dict[str, str | int | None]]] = {}
+ # Maps a node name to a list of objects that represent possible child images
+processedNodes: dict[str, str] = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used
+parentToChosenTips: dict[str, int] = {} # Used to prefer images from children with more tips
iterNum = 0
-while len(resolvedNodes) > 0:
+while resolvedNodes:
iterNum += 1
if iterNum % 1e3 == 0:
- print(f"At iteration {iterNum}")
+ print(f'At iteration {iterNum}')
# Get next node
- (nodeName, otolId) = resolvedNodes.popitem()
+ nodeName, otolId = resolvedNodes.popitem()
processedNodes[nodeName] = otolId
# Traverse upwards, resolving ancestors if able
while True:
# Get parent
- row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
- if row == None or row[0] in processedNodes or row[0] in resolvedNodes:
+ row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone()
+ if row is None or row[0] in processedNodes or row[0] in resolvedNodes:
break
- parent = row[0]
+ parent: str = row[0]
# Get parent data
if parent not in nodesToResolve:
- childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))]
- query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames)))
- childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)]
- childObjs.sort(key=lambda x: x["tips"], reverse=True)
+ childNames: list[str] = [
+ row[0] for row in dbCur.execute('SELECT child FROM edges WHERE parent = ?', (parent,))]
+ query = 'SELECT name, tips FROM nodes WHERE name IN ({})'.format(','.join(['?'] * len(childNames)))
+ childObjs = [{'name': row[0], 'tips': row[1], 'otolId': None} for row in dbCur.execute(query, childNames)]
+ childObjs.sort(key=lambda x: x['tips'], reverse=True)
nodesToResolve[parent] = childObjs
else:
childObjs = nodesToResolve[parent]
# Check if highest-tips child
- if (childObjs[0]["name"] == nodeName):
+ if childObjs[0]['name'] == nodeName:
# Resolve parent, and continue from it
- dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, otolId))
+ dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (parent, otolId))
del nodesToResolve[parent]
processedNodes[parent] = otolId
- parentToChosenTips[parent] = childObjs[0]["tips"]
+ parentToChosenTips[parent] = childObjs[0]['tips']
nodeName = parent
continue
else:
# Mark child as a potential choice
- childObj = next(c for c in childObjs if c["name"] == nodeName)
- childObj["otolId"] = otolId
+ childObj = next(c for c in childObjs if c['name'] == nodeName)
+ childObj['otolId'] = otolId
break
# When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve
- if len(resolvedNodes) == 0:
- for (name, childObjs) in nodesToResolve.items():
- childObj = next(c for c in childObjs if c["otolId"] != None)
- resolvedNodes[name] = childObj["otolId"]
- parentToChosenTips[name] = childObj["tips"]
- dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"]))
+ if not resolvedNodes:
+ for name, childObjs in nodesToResolve.items():
+ childObj = next(c for c in childObjs if c['otolId'] is not None)
+ resolvedNodes[name] = childObj['otolId']
+ parentToChosenTips[name] = childObj['tips']
+ dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (name, childObj['otolId']))
nodesToResolve.clear()
-print("Replacing linked-images for compound nodes")
+print('Replacing linked-images for compound nodes')
iterNum = 0
for nodeName in processedNodes.keys():
iterNum += 1
if iterNum % 1e4 == 0:
- print(f"At iteration {iterNum}")
+ print(f'At iteration {iterNum}')
#
match = compoundNameRegex.fullmatch(nodeName)
- if match != None:
+ if match is not None:
# Replace associated image with subname images
- (subName1, subName2) = match.group(1,2)
- otolIdPair = ["", ""]
+ subName1, subName2 = match.group(1,2)
+ otolIdPair = ['', '']
if subName1 in processedNodes:
otolIdPair[0] = processedNodes[subName1]
if subName2 in processedNodes:
otolIdPair[1] = processedNodes[subName2]
# Use no image if both subimages not found
- if otolIdPair[0] == "" and otolIdPair[1] == "":
- dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,))
+ if otolIdPair[0] == '' and otolIdPair[1] == '':
+ dbCur.execute('DELETE FROM linked_imgs WHERE name = ?', (nodeName,))
continue
# Add to db
- dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
- (otolIdPair[0] + "," + otolIdPair[1], nodeName))
+ dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?',
+ (otolIdPair[0] + ',' + otolIdPair[1], nodeName))
# Possibly repeat operation upon parent/ancestors
if upPropagateCompoundImgs:
while True:
# Get parent
- row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
- if row != None:
+ row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone()
+ if row is not None:
parent = row[0]
# Check num tips
- (numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone()
+ (numTips,) = dbCur.execute('SELECT tips from nodes WHERE name = ?', (nodeName,)).fetchone()
if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips:
# Replace associated image
- dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
- (otolIdPair[0] + "," + otolIdPair[1], parent))
+ dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?',
+ (otolIdPair[0] + ',' + otolIdPair[1], parent))
nodeName = parent
continue
break
-print("Closing databases")
+print('Closing databases')
dbCon.commit()
dbCon.close()