diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-09-07 11:37:37 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-09-07 11:37:37 +1000 |
| commit | daccbbd9c73a5292ea9d6746560d7009e5aa666d (patch) | |
| tree | 9156bf011ab6302eb3c0d219d40587d594f51841 /backend/tolData/genLinkedImgs.py | |
| parent | 1a7fe33edafa68a6f759d124bdeee673ff9cf9ff (diff) | |
Add python type annotations
Also use consistent quote symbols
Also use 'is None' instead of '== None'
Also use 'if list1' instead of 'if len(list1) > 0'
Diffstat (limited to 'backend/tolData/genLinkedImgs.py')
| -rwxr-xr-x | backend/tolData/genLinkedImgs.py | 102 |
1 files changed, 52 insertions, 50 deletions
diff --git a/backend/tolData/genLinkedImgs.py b/backend/tolData/genLinkedImgs.py index eb991b9..6d2feff 100755 --- a/backend/tolData/genLinkedImgs.py +++ b/backend/tolData/genLinkedImgs.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -import sys, re +import re import sqlite3 import argparse @@ -10,113 +10,115 @@ associate them with images from their children """, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() -dbFile = "data.db" -compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]") +dbFile = 'data.db' +compoundNameRegex = re.compile(r'\[(.+) \+ (.+)]') upPropagateCompoundImgs = False -print("Opening databases") +print('Opening databases') dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() -dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)") +dbCur.execute('CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)') -print("Getting nodes with images") -resolvedNodes = {} # Will map node names to otol IDs with a usable image -query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" -for (name, otolId) in dbCur.execute(query): +print('Getting nodes with images') +resolvedNodes: dict[str, str] = {} # Will map node names to otol IDs with a usable image +query = 'SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name' +for name, otolId in dbCur.execute(query): resolvedNodes[name] = otolId -print(f"Found {len(resolvedNodes)}") +print(f'Found {len(resolvedNodes)}') -print("Iterating through nodes, trying to resolve images for ancestors") -nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images -processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used -parentToChosenTips = {} # Used to prefer images from children with more tips +print('Iterating through nodes, trying to resolve images for ancestors') +nodesToResolve: dict[str, list[dict[str, str | int | None]]] = {} + # Maps a node name to a list of objects that represent possible child images +processedNodes: dict[str, str] = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used +parentToChosenTips: dict[str, int] = {} # Used to prefer images from children with more tips iterNum = 0 -while len(resolvedNodes) > 0: +while resolvedNodes: iterNum += 1 if iterNum % 1e3 == 0: - print(f"At iteration {iterNum}") + print(f'At iteration {iterNum}') # Get next node - (nodeName, otolId) = resolvedNodes.popitem() + nodeName, otolId = resolvedNodes.popitem() processedNodes[nodeName] = otolId # Traverse upwards, resolving ancestors if able while True: # Get parent - row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone() - if row == None or row[0] in processedNodes or row[0] in resolvedNodes: + row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone() + if row is None or row[0] in processedNodes or row[0] in resolvedNodes: break - parent = row[0] + parent: str = row[0] # Get parent data if parent not in nodesToResolve: - childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))] - query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames))) - childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)] - childObjs.sort(key=lambda x: x["tips"], reverse=True) + childNames: list[str] = [ + row[0] for row in dbCur.execute('SELECT child FROM edges WHERE parent = ?', (parent,))] + query = 'SELECT name, tips FROM nodes WHERE name IN ({})'.format(','.join(['?'] * len(childNames))) + childObjs = [{'name': row[0], 'tips': row[1], 'otolId': None} for row in dbCur.execute(query, childNames)] + childObjs.sort(key=lambda x: x['tips'], reverse=True) nodesToResolve[parent] = childObjs else: childObjs = nodesToResolve[parent] # Check if highest-tips child - if (childObjs[0]["name"] == nodeName): + if childObjs[0]['name'] == nodeName: # Resolve parent, and continue from it - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, otolId)) + dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (parent, otolId)) del nodesToResolve[parent] processedNodes[parent] = otolId - parentToChosenTips[parent] = childObjs[0]["tips"] + parentToChosenTips[parent] = childObjs[0]['tips'] nodeName = parent continue else: # Mark child as a potential choice - childObj = next(c for c in childObjs if c["name"] == nodeName) - childObj["otolId"] = otolId + childObj = next(c for c in childObjs if c['name'] == nodeName) + childObj['otolId'] = otolId break # When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve - if len(resolvedNodes) == 0: - for (name, childObjs) in nodesToResolve.items(): - childObj = next(c for c in childObjs if c["otolId"] != None) - resolvedNodes[name] = childObj["otolId"] - parentToChosenTips[name] = childObj["tips"] - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"])) + if not resolvedNodes: + for name, childObjs in nodesToResolve.items(): + childObj = next(c for c in childObjs if c['otolId'] is not None) + resolvedNodes[name] = childObj['otolId'] + parentToChosenTips[name] = childObj['tips'] + dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (name, childObj['otolId'])) nodesToResolve.clear() -print("Replacing linked-images for compound nodes") +print('Replacing linked-images for compound nodes') iterNum = 0 for nodeName in processedNodes.keys(): iterNum += 1 if iterNum % 1e4 == 0: - print(f"At iteration {iterNum}") + print(f'At iteration {iterNum}') # match = compoundNameRegex.fullmatch(nodeName) - if match != None: + if match is not None: # Replace associated image with subname images - (subName1, subName2) = match.group(1,2) - otolIdPair = ["", ""] + subName1, subName2 = match.group(1,2) + otolIdPair = ['', ''] if subName1 in processedNodes: otolIdPair[0] = processedNodes[subName1] if subName2 in processedNodes: otolIdPair[1] = processedNodes[subName2] # Use no image if both subimages not found - if otolIdPair[0] == "" and otolIdPair[1] == "": - dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,)) + if otolIdPair[0] == '' and otolIdPair[1] == '': + dbCur.execute('DELETE FROM linked_imgs WHERE name = ?', (nodeName,)) continue # Add to db - dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?", - (otolIdPair[0] + "," + otolIdPair[1], nodeName)) + dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', + (otolIdPair[0] + ',' + otolIdPair[1], nodeName)) # Possibly repeat operation upon parent/ancestors if upPropagateCompoundImgs: while True: # Get parent - row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone() - if row != None: + row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone() + if row is not None: parent = row[0] # Check num tips - (numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone() + (numTips,) = dbCur.execute('SELECT tips from nodes WHERE name = ?', (nodeName,)).fetchone() if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips: # Replace associated image - dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?", - (otolIdPair[0] + "," + otolIdPair[1], parent)) + dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', + (otolIdPair[0] + ',' + otolIdPair[1], parent)) nodeName = parent continue break -print("Closing databases") +print('Closing databases') dbCon.commit() dbCon.close() |
