aboutsummaryrefslogtreecommitdiff
path: root/backend/tol_data/gen_linked_imgs.py
blob: 7002e9213827913ae96b88b6db5e836a54571b9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python3

"""
Look for nodes without images in the database, and tries to
associate them with images from their children
"""

import re
import sqlite3

DB_FILE = 'data.db'
#
COMPOUND_NAME_REGEX = re.compile(r'\[(.+) \+ (.+)]')
UP_PROPAGATE_COMPOUND_IMGS = False

def genData(dbFile: str) -> None:
	print('Opening database')
	dbCon = sqlite3.connect(dbFile)
	dbCur = dbCon.cursor()
	dbCur.execute('CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)')
	#
	print('Getting nodes with images')
	nodeToUsedId: dict[str, str] = {} # Maps name of node to otol ID of node to use image for
	query = 'SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name'
	for name, otolId in dbCur.execute(query):
		nodeToUsedId[name] = otolId
	print(f'Found {len(nodeToUsedId)}')
	#
	print('Getting node depths')
	nodeToDepth: dict[str, int] = {}
	maxDepth = 0
	nodeToParent: dict[str, str | None] = {} # Maps name of node to name of parent
	for nodeName in nodeToUsedId.keys():
		nodeChain = [nodeName]
		lastDepth = 0
		# Add ancestors
		while True:
			row = dbCur.execute('SELECT parent FROM edges WHERE child = ?', (nodeName,)).fetchone()
			if row is None:
				nodeToParent[nodeName] = None
				break
			nodeToParent[nodeName] = row[0]
			nodeName = row[0]
			nodeChain.append(nodeName)
			if nodeName in nodeToDepth:
				lastDepth = nodeToDepth[nodeName]
				break
		# Add depths
		for i in range(len(nodeChain)):
			nodeToDepth[nodeChain[-i-1]] = i + lastDepth
		maxDepth = max(maxDepth, lastDepth + len(nodeChain) - 1)
	#
	print('Finding ancestors to give linked images')
	depthToNodes: dict[int, list[str]] = {depth: [] for depth in range(maxDepth + 1)}
	for nodeName, depth in nodeToDepth.items():
		depthToNodes[depth].append(nodeName)
	parentToCandidate: dict[str, tuple[str, int]] = {} # Maps parent node name to candidate child name and tips-val
	iterNum = 0
	for depth in range(maxDepth, -1, -1):
		for node in depthToNodes[depth]:
			iterNum += 1
			if iterNum % 1e4 == 0:
				print(f'At iteration {iterNum}')
			#
			if node in parentToCandidate:
				nodeToUsedId[node] = nodeToUsedId[parentToCandidate[node][0]]
				dbCur.execute('INSERT INTO linked_imgs VALUES (?, ?)', (node, nodeToUsedId[node]))
			parent = nodeToParent[node]
			if parent is not None and parent not in nodeToUsedId:
				(tips,) = dbCur.execute('SELECT tips FROM nodes WHERE name == ?', (node,)).fetchone()
				if parent not in parentToCandidate or parentToCandidate[parent][1] < tips:
					parentToCandidate[parent] = (node, tips)
	#
	print('Replacing linked-images for compound nodes')
	for iterNum, node in enumerate(parentToCandidate.keys(), 1):
		if iterNum % 1e4 == 0:
			print(f'At iteration {iterNum}')
		#
		match = COMPOUND_NAME_REGEX.fullmatch(node)
		if match is not None:
			# Replace associated image with subname images
			subName1, subName2 = match.group(1,2)
			otolIdPair = ['', '']
			if subName1 in nodeToUsedId:
				otolIdPair[0] = nodeToUsedId[subName1]
			if subName2 in nodeToUsedId:
				otolIdPair[1] = nodeToUsedId[subName2]
			# Use no image if both subimages not found
			if otolIdPair[0] == '' and otolIdPair[1] == '':
				dbCur.execute('DELETE FROM linked_imgs WHERE name = ?', (node,))
				continue
			# Add to db
			dbCur.execute('UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', (','.join(otolIdPair), node))
			# Possibly repeat operation upon parent/ancestors
			if UP_PROPAGATE_COMPOUND_IMGS:
				while True:
					parent = nodeToParent[node]
					if parent is not None:
						(tips,) = dbCur.execute('SELECT tips from nodes WHERE name = ?', (node,)).fetchone()
						if parent in parentToCandidate and parentToCandidate[parent][1] <= tips:
							# Replace associated image
							dbCur.execute(
								'UPDATE linked_imgs SET otol_ids = ? WHERE name = ?', (','.join(otolIdPair), parent))
							node = parent
							continue
					break
	#
	print('Closing database')
	dbCon.commit()
	dbCon.close()

if __name__ == '__main__':
	import argparse
	parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
	parser.parse_args()
	#
	genData(DB_FILE)