1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
#!/usr/bin/python3
"""
Look for nodes without images in the database, and tries to
associate them with images from their children
"""
import re
import sqlite3
DB_FILE = 'data.db'
#
COMPOUND_NAME_REGEX = re.compile(r'\[(.+) \+ (.+)]')
UP_PROPAGATE_COMPOUND_IMGS = False
def genData(dbFile: str) -> None:
print('Opening database')
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
dbCur.execute('CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)')
#
print('Getting nodes with images')
nodeToUsedId: dict[str, str] = {} # Maps name of node to otol ID of node to use image for
query = 'SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name'
for name, otolId in dbCur.execute(query):
nodeToUsedId[name] = otolId
print(f'Found {len(nodeToUsedId)}')
#
print('Getting node depths')
nodeToDepth: dict[str, int] = {}
maxDepth = 0
nodeToParent: dict[str, str | None] = {} # Maps name of node to name of parent
for nodeName in nodeToUsedId.keys():
nodeChain = [nodeName]
lastDepth = 0
# Add ancestors
while True:
row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone()
if row is None:
nodeToParent[nodeName] = None
break
nodeToParent[nodeName] = row[0]
nodeName = row[0]
nodeChain.append(nodeName)
if nodeName in nodeToDepth:
lastDepth = nodeToDepth[nodeName]
break
# Add depths
for i in range(len(nodeChain)):
nodeToDepth[nodeChain[-i-1]] = i + lastDepth
maxDepth = max(maxDepth, lastDepth + len(nodeChain) - 1)
#
print('Finding ancestors to give linked images')
depthToNodes: dict[int, list[str]] = {depth: [] for depth in range(maxDepth + 1)}
for nodeName, depth in nodeToDepth.items():
depthToNodes[depth].append(nodeName)
parentToCandidate: dict[str, tuple[str, int]] = {} # Maps parent node name to candidate child name and tips-val
iterNum = 0
for depth in range(maxDepth, -1, -1):
for node in depthToNodes[depth]:
iterNum += 1
if iterNum % 1e4 == 0:
print(f'At iteration {iterNum}')
#
if node in parentToCandidate:
nodeToUsedId[node] = nodeToUsedId[parentToCandidate[node][0]]
dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (node, nodeToUsedId[node]))
parent = nodeToParent[node]
if parent is not None and parent not in nodeToUsedId:
(tips,) = dbCur.execute('SELECT tips FROM nodes WHERE name == ?', (node,)).fetchone()
if parent not in parentToCandidate or parentToCandidate[parent][1] < tips:
parentToCandidate[parent] = (node, tips)
#
print('Replacing linked-images for compound nodes')
for iterNum, node in enumerate(parentToCandidate.keys(), 1):
if iterNum % 1e4 == 0:
print(f'At iteration {iterNum}')
#
match = COMPOUND_NAME_REGEX.fullmatch(node)
if match is not None:
# Replace associated image with subname images
subName1, subName2 = match.group(1,2)
otolIdPair = ['', '']
if subName1 in nodeToUsedId:
otolIdPair[0] = nodeToUsedId[subName1]
if subName2 in nodeToUsedId:
otolIdPair[1] = nodeToUsedId[subName2]
# Use no image if both subimages not found
if otolIdPair[0] == '' and otolIdPair[1] == '':
dbCur.execute('DELETE FROM linked_imgs WHERE name = ?', (node,))
continue
# Add to db
dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', (','.join(otolIdPair), node))
# Possibly repeat operation upon parent/ancestors
if UP_PROPAGATE_COMPOUND_IMGS:
while True:
parent = nodeToParent[node]
if parent is not None:
(tips,) = dbCur.execute('SELECT tips from nodes WHERE name = ?', (node,)).fetchone()
if parent in parentToCandidate and parentToCandidate[parent][1] <= tips:
# Replace associated image
dbCur.execute(
'UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', (','.join(otolIdPair), parent))
node = parent
continue
break
#
print('Closing database')
dbCon.commit()
dbCon.close()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
genData(DB_FILE)
|