diff options
Diffstat (limited to 'backend/data')
| -rw-r--r-- | backend/data/README.md | 3 | ||||
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 52 |
2 files changed, 26 insertions, 29 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 174c262..7c03d9e 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -36,9 +36,8 @@ File Generation Process and outputs choice information into mergedImgList.txt. 7 Run genImgsForWeb.py, which creates cropped/resized images in img/, using mergedImgList.txt, and adds 'images' and 'node_imgs' tables to data.db. - 8 Run genLinkedImgs.py to add a 'linked_imgs' table to data.db, - which uses 'nodes', 'edges', 'eol\_ids', and 'images', to associate + which uses 'nodes', 'edges', 'eol\_ids', and 'node_imgs', to associate nodes without images to child images. 5 Reduced Tree Structure Data 1 Run genReducedTreeData.py, which adds 'r_nodes' and 'r_edges' tables to diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py index 6e26ca4..4ad1a8d 100755 --- a/backend/data/genLinkedImgs.py +++ b/backend/data/genLinkedImgs.py @@ -17,16 +17,14 @@ upPropagateCompoundImgs = False # Open db dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() -dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, eol_id INT, eol_id2 INT)") - # The eol_id2 column is used to provide the second part of a compound-image +dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_id TEXT, otol_id2 TEXT)") + # Associates a node with one (or two) otol-ids with usable images # Get nodes with images print("Getting nodes with images") -resolvedNodes = {} # Will map node names to eol IDs with a usable image -query = "SELECT nodes.name, eol_ids.id FROM" \ - " nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name" \ - " INNER JOIN images ON eol_ids.id = images.eol_id" -for (name, eolId) in dbCur.execute(query): - resolvedNodes[name] = eolId +resolvedNodes = {} # Will map node names to otol IDs with a usable image +query = "SELECT name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.id = node_imgs.id" +for (name, otolId) in dbCur.execute(query): + resolvedNodes[name] = otolId print(f"Got {len(resolvedNodes)} nodes") # Iterate through resolved nodes, resolving ancestors where able print("Resolving ancestor nodes") @@ -39,8 +37,8 @@ while len(resolvedNodes) > 0: if iterNum % 1e3 == 0: print(f"At iteration {iterNum}") # Get next node - (nodeName, eolId) = resolvedNodes.popitem() - processedNodes[nodeName] = eolId + (nodeName, otolId) = resolvedNodes.popitem() + processedNodes[nodeName] = otolId # Traverse upwards, resolving ancestors if able while True: # Get parent @@ -52,7 +50,7 @@ while len(resolvedNodes) > 0: if parent not in nodesToResolve: childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (parent,))] query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames))) - childObjs = [{"name": row[0], "tips": row[1], "eolId": None} for row in dbCur.execute(query, childNames)] + childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)] childObjs.sort(key=lambda x: x["tips"], reverse=True) nodesToResolve[parent] = childObjs else: @@ -60,24 +58,24 @@ while len(resolvedNodes) > 0: # Check if highest-tips child if (childObjs[0]["name"] == nodeName): # Resolve parent, and continue from it - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (parent, eolId, None)) + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (parent, otolId, None)) del nodesToResolve[parent] - processedNodes[parent] = eolId + processedNodes[parent] = otolId parentToChosenTips[parent] = childObjs[0]["tips"] nodeName = parent continue else: - # Add potential EOL ID to parent + # Add potential otol-id childObj = next(c for c in childObjs if c["name"] == nodeName) - childObj["eolId"] = eolId + childObj["otolId"] = otolId break - # When out of resolved nodes, resolve any nodesToResolve nodes + # When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve if len(resolvedNodes) == 0: for (name, childObjs) in nodesToResolve.items(): - childObj = next(c for c in childObjs if c["eolId"] != None) - resolvedNodes[name] = childObj["eolId"] + childObj = next(c for c in childObjs if c["otolId"] != None) + resolvedNodes[name] = childObj["otolId"] parentToChosenTips[name] = childObj["tips"] - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (name, childObj["eolId"], None)) + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (name, childObj["otolId"], None)) nodesToResolve.clear() # Iterate through processed nodes with compound names print("Replacing images for compound-name nodes") @@ -91,18 +89,18 @@ for nodeName in processedNodes.keys(): if match != None: # Replace associated image with subname images (subName1, subName2) = match.group(1,2) - eolIdPair = [0, 0] + otolIdPair = [0, 0] if subName1 in processedNodes: - eolIdPair[0] = processedNodes[subName1] + otolIdPair[0] = processedNodes[subName1] if subName2 in processedNodes: - eolIdPair[1] = processedNodes[subName2] + otolIdPair[1] = processedNodes[subName2] # Use no image if both subimages not found - if eolIdPair[0] == 0 and eolIdPair[1] == 0: + if otolIdPair[0] == 0 and otolIdPair[1] == 0: dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,)) continue # Add to db - dbCur.execute("UPDATE linked_imgs SET eol_id = ?, eol_id2 = ? WHERE name = ?", - (eolIdPair[0], eolIdPair[1], nodeName,)) + dbCur.execute("UPDATE linked_imgs SET otol_id = ?, otol_id2 = ? WHERE name = ?", + (otolIdPair[0], otolIdPair[1], nodeName)) # Possibly repeat operation upon parent/ancestors if upPropagateCompoundImgs: while True: @@ -114,8 +112,8 @@ for nodeName in processedNodes.keys(): (numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone() if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips: # Replace associated image - dbCur.execute("UPDATE linked_imgs SET eol_id = ?, eol_id2 = ? WHERE name = ?", - (eolIdPair[0], eolIdPair[1], parent)) + dbCur.execute("UPDATE linked_imgs SET otol_id = ?, otol_id2 = ? WHERE name = ?", + (otolIdPair[0], otolIdPair[1], parent)) nodeName = parent continue break |
