Refactor backend scripts

author: Terry Truong <terry06890@gmail.com> 2022-06-22 23:16:42 +1000
committer: Terry Truong <terry06890@gmail.com> 2022-06-22 23:16:42 +1000
commit: abb936f5d76f7fe5cec1e8948d287da86643d504 (patch)
tree: f07b9eaadf5ae91363fdbac9d81b74e1fb0a436f
parent: e78c4df403e5f98afa08f7a0841ff233d5f6d05b (diff)
25 files changed, 876 insertions, 721 deletions
diff --git a/backend/data/README.md b/backend/data/README.md
index 7d1adad..f5b35f0 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -1,17 +1,50 @@
 This directory holds files used to generate data.db, which contains tree-of-life data.
 
 # Tables:
--   `nodes`:       `name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT`
--   `edges`:       `node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child)`
--   `eol_ids`:     `id INT PRIMARY KEY, name TEXT`
--   `names`:       `name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name)`
--   `wiki_ids`:    `name TEXT PRIMARY KEY, id INT, redirected INT`
--   `descs`:       `wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT`
--   `node_imgs`:   `name TEXT PRIMARY KEY, img_id INT, src TEXT`
--   `images`:      `id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src)`
--   `linked_imgs`: `name TEXT PRIMARY KEY, otol_ids TEXT`
--   `r_nodes`:     `name TEXT PRIMARY KEY, tips INT`
--   `r_edges`:     `node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child)`
+## Tree Structure data
+-   `nodes` <br>
+    Format : `name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT` <br>
+    Represents a tree-of-life node. `tips` represents the number of no-child descendants.
+-   `edges` <br>
+    Format: `parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child)` <br>
+    `p_support` is 1 if the edge has 'phylogenetic support', and 0 otherwise
+## Node name data
+-   `eol_ids` <br>
+    Format: `id INT PRIMARY KEY, name TEXT` <br>
+    Associates an EOL ID with a node's name.
+-   `names` <br>
+    Format: `name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name)` <br>
+    Associates a node with alternative names.
+    `pref_alt` is 1 if the alt-name is the most 'preferred' one.
+    `src` indicates the dataset the alt-name was obtained from (can be 'eol', 'enwiki', or 'picked').
+## Node description data
+-   `wiki_ids` <br>
+    Format: `name TEXT PRIMARY KEY, id INT, redirected INT` <br>
+    Associates a node with a wikipedia page ID.
+    `redirected` is 1 if the node was associated with a different page that redirected to this one.
+-   `descs` <br>
+    Format: `wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT` <br>
+    Associates a wikipedia page ID with a short-description.
+    `from_dbp` is 1 if the description was obtained from DBpedia, and 0 otherwise.
+## Node image data
+-   `node_imgs` <br>
+    Format: `name TEXT PRIMARY KEY, img_id INT, src TEXT` <br>
+    Associates a node with an image.
+-   `images` <br>
+    Format: `id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src)` <br>
+    Represents an image, identified by a source ('eol', 'enwiki', or 'picked'), and a source-specific ID.
+-   `linked_imgs` <br>
+    Format: `name TEXT PRIMARY KEY, otol_ids TEXT` <br>
+    Associates a node with an image from another node.
+    `otol_ids` can be an otol ID, or two comma-separated otol IDs or empty strings.
+        The latter is used for compound nodes.
+## Reduced-tree data
+-   `r_nodes` <br>
+    Format: `name TEXT PRIMARY KEY, tips INT` <br>
+    Like `nodes`, but for a reduced tree.
+-   `r_edges` <br>
+    Format: `node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child)` <br>
+    Like `edges` but for a reduced tree.
 
 # Generating the Database
 
@@ -68,7 +101,7 @@ Some of the python scripts require third-party packages:
     -   pickedEnwikiNamesToSkip.txt: Same as with genDbpData.py.
     -   pickedEnwikiLabels.txt: Similar to pickedDbpLabels.txt.
 
-## Generate image data
+## Generate node image data
 ### Get images from EOL
 1.  Obtain 'image metadata files' in eol/, as specified in it's README.
 2.  In eol/, run downloadImgs.py, which downloads images (possibly multiple per node),
@@ -81,7 +114,7 @@ Some of the python scripts require third-party packages:
     using the `wiki_ids` table, and stores them in a database.
 2.  In enwiki/, run downloadImgLicenseInfo.py, which downloads licensing information for
     those images, using wikipedia's online API.
-3.  In enwiki/, run downloadEnwikiImgs.py, which downloads 'permissively-licensed'
+3.  In enwiki/, run downloadImgs.py, which downloads 'permissively-licensed'
     images into enwiki/imgs/.
 ### Merge the image sets
 1.  Run reviewImgsToGen.py, which displays images from eol/imgs/ and enwiki/imgs/,
@@ -107,15 +140,16 @@ Some of the python scripts require third-party packages:
     `nodes`, `edges`, and `node_imgs` tables.
 
 ## Do some post-processing
-1.  Run genReducedTreeData.py, which generates a second, reduced version of the tree,
-    adding the `r_nodes` and `r_edges` tables, using `nodes` and `names`. Reads from
-    pickedReducedNodes.txt, which lists names of nodes that must be included (1 per line).
-2.  Optionally run trimTree.py, which tries to remove some 'low-significance' nodes,
-    for the sake of performance and result-relevance. Otherwise, some nodes may have
-    over 10k children, which can take a while to render (over a minute in my testing).
-    You might want to backup the untrimmed tree first, as this operation is not easily
-    reversible.
-3.  Optionally run genEnwikiNameData.py, which adds more entries to the `names` table,
+1.  Run genEnwikiNameData.py, which adds more entries to the `names` table,
     using data in enwiki/, and the `names` and `wiki_ids` tables.
-4.  Optionally run addPickedNames.py, which allows adding manually-selected name data to
+2.  Optionally run addPickedNames.py, which allows adding manually-selected name data to
     the `names` table, as specified in pickedNames.txt.
+    -   pickedNames.txt: Has lines of the form `nodeName1|altName1|prefAlt1`.
+        These correspond to entries in the `names` table. `prefAlt` should be 1 or 0.
+        A line like `name1|name1|1` causes a node to have no preferred alt-name.
+3.  Run genReducedTreeData.py, which generates a second, reduced version of the tree,
+    adding the `r_nodes` and `r_edges` tables, using `nodes` and `names`. Reads from
+    pickedReducedNodes.txt, which lists names of nodes that must be included (1 per line).
+4.  Optionally run trimTree.py, which tries to remove some 'low significance' nodes,
+    for the sake of performance and content-relevance. Otherwise, some nodes may have
+    over 10k children, which can take a while to render (took over a minute in testing).
diff --git a/backend/data/addPickedNames.py b/backend/data/addPickedNames.py
index 3ef099a..d56a0cb 100755
--- a/backend/data/addPickedNames.py
+++ b/backend/data/addPickedNames.py
@@ -3,12 +3,11 @@
 import sys
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads alt-name data from a file, and adds it to the 'names' table.\n"
-usageInfo += "The file is expected to have lines of the form: nodeName|altName|prefAlt\n"
-usageInfo += "    These correspond to entries in the 'names' table. 'prefAlt' should\n"
-usageInfo += "    be 1 or 0. A line may specify name1|name1|1, which causes the node\n"
-usageInfo += "    to have no preferred alt-name.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads alt-name data from a file, and adds it to the database's 'names' table.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -16,15 +15,21 @@ if len(sys.argv) > 1:
 dbFile = "data.db"
 pickedNamesFile = "pickedNames.txt"
 
-# Open db
+print("Opening database")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Iterate through picked-names file
+
+print("Iterating through picked-names file")
 with open(pickedNamesFile) as file:
 	for line in file:
 		# Get record data
-		(nodeName, altName, prefAlt) = line.lower().rstrip().split("|")
+		nodeName, altName, prefAlt = line.lower().rstrip().split("|")
 		prefAlt = int(prefAlt)
+		# Check whether there exists a node with the name
+		row = dbCur.execute("SELECT name from nodes where name = ?", (nodeName,)).fetchone()
+		if row == None:
+			print(f"ERROR: No node with name \"{nodeName}\" exists")
+			break
 		# Remove any existing preferred-alt status
 		if prefAlt == 1:
 			query = "SELECT name, alt_name FROM names WHERE name = ? AND pref_alt = 1"
@@ -46,6 +51,7 @@ with open(pickedNamesFile) as file:
 				print(f"Updating record for alt-name {altName} for {nodeName}")
 				dbCur.execute("UPDATE names SET pref_alt = ?, src = 'picked' WHERE name = ? AND alt_name = ?",
 					(prefAlt, nodeName, altName))
-# Close db
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/dbpedia/genDescData.py b/backend/data/dbpedia/genDescData.py
index bba3ff5..d9e8a80 100755
--- a/backend/data/dbpedia/genDescData.py
+++ b/backend/data/dbpedia/genDescData.py
@@ -3,25 +3,28 @@
 import sys, re
 import bz2, sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads DBpedia labels/types/abstracts/etc data,\n"
-usageInfo += "and creates a sqlite db containing that data.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Adds DBpedia labels/types/abstracts/etc data into a database.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-labelsFile = "labels_lang=en.ttl.bz2" # Has about 16e6 lines
+labelsFile = "labels_lang=en.ttl.bz2" # Had about 16e6 entries
 idsFile = "page_lang=en_ids.ttl.bz2"
 redirectsFile = "redirects_lang=en_transitive.ttl.bz2"
 disambigFile = "disambiguations_lang=en.ttl.bz2"
 typesFile = "instance-types_lang=en_specific.ttl.bz2"
 abstractsFile = "short-abstracts_lang=en.ttl.bz2"
 dbFile = "descData.db"
+# In testing, this script took a few hours to run, and generated about 10GB
 
-# Open db
+print("Creating database")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Read/store labels
+
 print("Reading/storing label data")
 dbCur.execute("CREATE TABLE labels (iri TEXT PRIMARY KEY, label TEXT)")
 dbCur.execute("CREATE INDEX labels_idx ON labels(label)")
@@ -32,16 +35,13 @@ with bz2.open(labelsFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		match = labelLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			dbCur.execute("INSERT INTO labels VALUES (?, ?)", (match.group(1), match.group(2)))
-dbCon.commit()
-# Read/store wiki page ids
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO labels VALUES (?, ?)", (match.group(1), match.group(2)))
+
 print("Reading/storing wiki page ids")
 dbCur.execute("CREATE TABLE ids (iri TEXT PRIMARY KEY, id INT)")
 idLineRegex = re.compile(r'<([^>]+)> <[^>]+> "(\d+)".*\n')
@@ -50,20 +50,17 @@ with bz2.open(idsFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		match = idLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			try:
-				dbCur.execute("INSERT INTO ids VALUES (?, ?)", (match.group(1), int(match.group(2))))
-			except sqlite3.IntegrityError as e:
-				# Accounts for certain lines that have the same IRI
-				print(f"Failed to add entry with IRI \"{match.group(1)}\": {e}")
-dbCon.commit()
-# Read/store redirects
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		try:
+			dbCur.execute("INSERT INTO ids VALUES (?, ?)", (match.group(1), int(match.group(2))))
+		except sqlite3.IntegrityError as e:
+			# Accounts for certain lines that have the same IRI
+			print(f"WARNING: Failed to add entry with IRI \"{match.group(1)}\": {e}")
+
 print("Reading/storing redirection data")
 dbCur.execute("CREATE TABLE redirects (iri TEXT PRIMARY KEY, target TEXT)")
 redirLineRegex = re.compile(r'<([^>]+)> <[^>]+> <([^>]+)> \.\n')
@@ -72,37 +69,28 @@ with bz2.open(redirectsFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		match = redirLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (match.group(1), match.group(2)))
-dbCon.commit()
-# Read/store diambiguation-page data
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (match.group(1), match.group(2)))
+
 print("Reading/storing diambiguation-page data")
-disambigNames = set()
+dbCur.execute("CREATE TABLE disambiguations (iri TEXT PRIMARY KEY)")
 disambigLineRegex = redirLineRegex
 lineNum = 0
 with bz2.open(disambigFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		match = disambigLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			disambigNames.add(match.group(1))
-dbCur.execute("CREATE TABLE disambiguations (iri TEXT PRIMARY KEY)")
-for name in disambigNames:
-	dbCur.execute("INSERT INTO disambiguations VALUES (?)", (name,))
-dbCon.commit()
-# Read/store instance-type
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT OR IGNORE INTO disambiguations VALUES (?)", (match.group(1),))
+
 print("Reading/storing instance-type data")
 dbCur.execute("CREATE TABLE types (iri TEXT, type TEXT)")
 dbCur.execute("CREATE INDEX types_iri_idx ON types(iri)")
@@ -112,16 +100,13 @@ with bz2.open(typesFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		match = typeLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			dbCur.execute("INSERT INTO types VALUES (?, ?)", (match.group(1), match.group(2)))
-dbCon.commit()
-# Read/store abstracts
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO types VALUES (?, ?)", (match.group(1), match.group(2)))
+
 print("Reading/storing abstracts")
 dbCur.execute("CREATE TABLE abstracts (iri TEXT PRIMARY KEY, abstract TEXT)")
 descLineRegex = labelLineRegex
@@ -130,17 +115,16 @@ with bz2.open(abstractsFile, mode='rt') as file:
 	for line in file:
 		lineNum += 1
 		if lineNum % 1e5 == 0:
-			print(f"Processing line {lineNum}")
+			print(f"At line {lineNum}")
 		#
 		if line[0] == "#":
 			continue
 		match = descLineRegex.fullmatch(line)
 		if match == None:
-			print(f"ERROR: Line {lineNum} has unexpected format", file=sys.stderr)
-			sys.exit(1)
-		else:
-			dbCur.execute("INSERT INTO abstracts VALUES (?, ?)",
-				(match.group(1), match.group(2).replace(r'\"', '"')))
-# Close db
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO abstracts VALUES (?, ?)",
+			(match.group(1), match.group(2).replace(r'\"', '"')))
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/enwiki/README.md b/backend/data/enwiki/README.md
index 1c16a2e..90d16c7 100644
--- a/backend/data/enwiki/README.md
+++ b/backend/data/enwiki/README.md
@@ -42,7 +42,7 @@ This directory holds files obtained from/using [English Wikipedia](https://en.wi
         `img_name` may be null, which means 'none found', and is used to avoid re-processing page-ids.
     -   `imgs`: `name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT` <br>
         Might lack some matches for `img_name` in `page_imgs`, due to licensing info unavailability.
--   downloadEnwikiImgs.py <br>
+-   downloadImgs.py <br>
     Used to download image files into imgs/.
 
 # Other Files
diff --git a/backend/data/enwiki/downloadImgLicenseInfo.py b/backend/data/enwiki/downloadImgLicenseInfo.py
index 097304b..399922e 100755
--- a/backend/data/enwiki/downloadImgLicenseInfo.py
+++ b/backend/data/enwiki/downloadImgLicenseInfo.py
@@ -5,41 +5,48 @@ import sqlite3, urllib.parse, html
 import requests
 import time, signal
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads image names from a file, and uses enwiki's API to obtain\n"
-usageInfo += "licensing information for them, adding the info to a sqlite db.\n"
-usageInfo += "\n"
-usageInfo += "SIGINT causes the program to finish an ongoing download and exit.\n"
-usageInfo += "The program can be re-run to continue downloading, and looks\n"
-usageInfo += "at names added to the db to decide what to skip.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads image names from a database, and uses enwiki's online API to obtain
+licensing information for them, adding the info to the database.
+
+SIGINT causes the program to finish an ongoing download and exit.
+The program can be re-run to continue downloading, and looks
+at already-processed names to decide what to skip.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-imgDb = "imgData.db" # About 130k image names
+imgDb = "imgData.db"
 apiUrl = "https://en.wikipedia.org/w/api.php"
+userAgent = "terryt.dev (terry06890@gmail.com)"
 batchSz = 50 # Max 50
 tagRegex = re.compile(r"<[^<]+>")
 whitespaceRegex = re.compile(r"\s+")
 
-# Open db
+print("Opening database")
 dbCon = sqlite3.connect(imgDb)
 dbCur = dbCon.cursor()
 dbCur2 = dbCon.cursor()
-# Create table if it doesn't exist
+print("Checking for table")
 if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='imgs'").fetchone() == None:
 	dbCur.execute("CREATE TABLE imgs(" \
 		"name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT)")
-# Get image names
+
 print("Reading image names")
 imgNames = set()
 for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL"):
 	imgNames.add(imgName)
-print(f"Found {len(imgNames)} images")
+print(f"Found {len(imgNames)}")
+
+print("Checking for already-processed images")
 oldSz = len(imgNames)
 for (imgName,) in dbCur.execute("SELECT name FROM imgs"):
 	imgNames.discard(imgName)
-print(f"Skipping {oldSz - len(imgNames)} already-done images")
+print(f"Found {oldSz - len(imgNames)}")
+
 # Set SIGINT handler
 interrupted = False
 oldHandler = None
@@ -48,7 +55,8 @@ def onSigint(sig, frame):
 	interrupted = True
 	signal.signal(signal.SIGINT, oldHandler)
 oldHandler = signal.signal(signal.SIGINT, onSigint)
-# Iterate through image names, making API requests
+
+print("Iterating through image names")
 imgNames = list(imgNames)
 iterNum = 0
 for i in range(0, len(imgNames), batchSz):
@@ -63,7 +71,7 @@ for i in range(0, len(imgNames), batchSz):
 	imgBatch = ["File:" + x for x in imgBatch]
 	# Make request
 	headers = {
-		"user-agent": "terryt.dev (terry06890@gmail.com)",
+		"user-agent": userAgent,
 		"accept-encoding": "gzip",
 	}
 	params = {
@@ -80,16 +88,16 @@ for i in range(0, len(imgNames), batchSz):
 		response = requests.get(apiUrl, params=params, headers=headers)
 		responseObj = response.json()
 	except Exception as e:
-		print(f"Error while downloading info: {e}", file=sys.stderr)
-		print(f"\tImage batch: " + "|".join(imgBatch), file=sys.stderr)
+		print(f"ERROR: Exception while downloading info: {e}")
+		print(f"\tImage batch: " + "|".join(imgBatch))
 		continue
 	# Parse response-object
 	if "query" not in responseObj or "pages" not in responseObj["query"]:
-		print("WARNING: Response object for doesn't have page data", file=sys.stderr)
-		print("\tImage batch: " + "|".join(imgBatch), file=sys.stderr)
+		print("WARNING: Response object for doesn't have page data")
+		print("\tImage batch: " + "|".join(imgBatch))
 		if "error" in responseObj:
 			errorCode = responseObj["error"]["code"]
-			print(f"\tError code: {errorCode}", file=sys.stderr)
+			print(f"\tError code: {errorCode}")
 			if errorCode == "maxlag":
 				time.sleep(5)
 		continue
@@ -111,10 +119,10 @@ for i in range(0, len(imgNames), batchSz):
 			title = normalisedToInput[title]
 		title = title[5:] # Remove 'File:'
 		if title not in imgNames:
-			print(f"WARNING: Got title \"{title}\" not in image-name list", file=sys.stderr)
+			print(f"WARNING: Got title \"{title}\" not in image-name list")
 			continue
 		if "imageinfo" not in page:
-			print(f"WARNING: No imageinfo section for page \"{title}\"", file=sys.stderr)
+			print(f"WARNING: No imageinfo section for page \"{title}\"")
 			continue
 		metadata = page["imageinfo"][0]["extmetadata"]
 		url = page["imageinfo"][0]["url"]
@@ -122,7 +130,7 @@ for i in range(0, len(imgNames), batchSz):
 		artist = metadata['Artist']['value'] if 'Artist' in metadata else None
 		credit = metadata['Credit']['value'] if 'Credit' in metadata else None
 		restrictions = metadata['Restrictions']['value'] if 'Restrictions' in metadata else None
-		# Remove newlines
+		# Remove markup
 		if artist != None:
 			artist = tagRegex.sub(" ", artist)
 			artist = whitespaceRegex.sub(" ", artist)
@@ -134,7 +142,9 @@ for i in range(0, len(imgNames), batchSz):
 			credit = html.unescape(credit)
 			credit = urllib.parse.unquote(credit)
 		# Add to db
-		dbCur2.execute("INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?)", (title, license, artist, credit, restrictions, url))
-# Close db
+		dbCur2.execute("INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?)",
+			(title, license, artist, credit, restrictions, url))
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/enwiki/downloadEnwikiImgs.py b/backend/data/enwiki/downloadImgs.py
index 2929a0d..8fb605f 100755
--- a/backend/data/enwiki/downloadEnwikiImgs.py
+++ b/backend/data/enwiki/downloadImgs.py
@@ -5,13 +5,16 @@ import sqlite3
 import urllib.parse, requests
 import time, signal
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Downloads images from URLs specified in an sqlite db,\n"
-usageInfo += "into a specified directory.'\n"
-usageInfo += "\n"
-usageInfo += "SIGINT causes the program to finish an ongoing download and exit.\n"
-usageInfo += "The program can be re-run to continue downloading, and looks\n"
-usageInfo += "in the output directory do decide what to skip.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Downloads images from URLs in an image database, into an output directory,
+with names of the form 'pageId1.ext1'.
+
+SIGINT causes the program to finish an ongoing download and exit.
+The program can be re-run to continue downloading, and looks
+in the output directory do decide what to skip.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -19,18 +22,18 @@ if len(sys.argv) > 1:
 imgDb = "imgData.db" # About 130k image names
 outDir = "imgs"
 licenseRegex = re.compile(r"cc0|cc([ -]by)?([ -]sa)?([ -][1234]\.[05])?( \w\w\w?)?", flags=re.IGNORECASE)
+# In testing, this downloaded about 100k images, over several days
 
-# Create output directory if not present
 if not os.path.exists(outDir):
 	os.mkdir(outDir)
-# Get existing image names
-print("Gettings already-downloaded images")
+print("Checking for already-downloaded images")
 fileList = os.listdir(outDir)
 pageIdsDone = set()
 for filename in fileList:
 	(basename, extension) = os.path.splitext(filename)
 	pageIdsDone.add(int(basename))
-print(f"Found {len(pageIdsDone)} already-downloaded images")
+print(f"Found {len(pageIdsDone)}")
+
 # Set SIGINT handler
 interrupted = False
 oldHandler = None
@@ -39,10 +42,10 @@ def onSigint(sig, frame):
 	interrupted = True
 	signal.signal(signal.SIGINT, oldHandler)
 oldHandler = signal.signal(signal.SIGINT, onSigint)
-# Open db
+
+print("Opening database")
 dbCon = sqlite3.connect(imgDb)
 dbCur = dbCon.cursor()
-# Start downloads
 print("Starting downloads")
 iterNum = 0
 query = "SELECT page_id, license, artist, credit, restrictions, url FROM" \
@@ -68,7 +71,7 @@ for (pageId, license, artist, credit, restrictions, url) in dbCur.execute(query)
 	urlParts = urllib.parse.urlparse(url)
 	extension = os.path.splitext(urlParts.path)[1]
 	if len(extension) <= 1:
-		print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr)
+		print(f"WARNING: No filename extension found in URL {url}")
 		sys.exit(1)
 	outFile = f"{outDir}/{pageId}{extension}"
 	headers = {
@@ -81,8 +84,8 @@ for (pageId, license, artist, credit, restrictions, url) in dbCur.execute(query)
 			file.write(response.content)
 		time.sleep(1)
 			# https://en.wikipedia.org/wiki/Wikipedia:Database_download says to "throttle self to 1 cache miss per sec"
-			# It's unclear how to properly check for cache misses, so just do about <=1 per sec
+			# It's unclear how to properly check for cache misses, so this just aims for 1 per sec
 	except Exception as e:
-		print(f"Error while downloading to {outFile}: {e}", file=sys.stderr)
-# Close db
+		print(f"Error while downloading to {outFile}: {e}")
+print("Closing database")
 dbCon.close()
diff --git a/backend/data/enwiki/genDescData.py b/backend/data/enwiki/genDescData.py
index 032dbed..b0ca272 100755
--- a/backend/data/enwiki/genDescData.py
+++ b/backend/data/enwiki/genDescData.py
@@ -5,31 +5,36 @@ import bz2
 import html, mwxml, mwparserfromhell
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads a Wikimedia enwiki dump, and adds page, redirect,\n"
-usageInfo += "and short-description info to an sqlite db.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads through the wiki dump, and attempts to
+parse short-descriptions, and add them to a database.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # 22,034,540 pages
+dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # Had about 22e6 pages
 enwikiDb = "descData.db"
+# In testing, this script took over 10 hours to run, and generated about 5GB
 
-# Some regexps and functions for parsing wikitext
 descLineRegex = re.compile("^ *[A-Z'\"]")
 embeddedHtmlRegex = re.compile(r"<[^<]+/>|<!--[^<]+-->|<[^</]+>([^<]*|[^<]*<[^<]+>[^<]*)</[^<]+>|<[^<]+$")
 	# Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag
 convertTemplateRegex = re.compile(r"{{convert\|(\d[^|]*)\|(?:(to|-)\|(\d[^|]*)\|)?([a-z][^|}]*)[^}]*}}")
-parensGrpRegex = re.compile(r" \([^()]*\)")
-leftoverBraceRegex = re.compile(r"(?:{\||{{).*")
 def convertTemplateReplace(match):
 	if match.group(2) == None:
 		return f"{match.group(1)} {match.group(4)}"
 	else:
 		return f"{match.group(1)} {match.group(2)} {match.group(3)} {match.group(4)}"
+parensGroupRegex = re.compile(r" \([^()]*\)")
+leftoverBraceRegex = re.compile(r"(?:{\||{{).*")
+
 def parseDesc(text):
-	# Find first matching line outside a {{...}} and [[...]] block-html-comments, then accumulate lines until a blank
-	# Some cases not accounted for: disambiguation pages, abstracts with sentences split-across-lines, 
+	# Find first matching line outside {{...}}, [[...]], and block-html-comment constructs,
+		# and then accumulate lines until a blank one.
+	# Some cases not accounted for include: disambiguation pages, abstracts with sentences split-across-lines, 
 		# nested embedded html, 'content significant' embedded-html, markup not removable with mwparsefromhell, 
 	lines = []
 	openBraceCount = 0
@@ -74,18 +79,15 @@ def removeMarkup(content):
 	content = embeddedHtmlRegex.sub("", content)
 	content = convertTemplateRegex.sub(convertTemplateReplace, content)
 	content = mwparserfromhell.parse(content).strip_code() # Remove wikitext markup
-	content = parensGrpRegex.sub("", content)
+	content = parensGroupRegex.sub("", content)
 	content = leftoverBraceRegex.sub("", content)
 	return content
-# Other helper functions
 def convertTitle(title):
 	return html.unescape(title).replace("_", " ")
 
-# Check for existing db
+print("Creating database")
 if os.path.exists(enwikiDb):
-	print(f"ERROR: Existing {enwikiDb}", file=sys.stderr)
-	sys.exit(1)
-# Create db
+	raise Exception(f"ERROR: Existing {enwikiDb}")
 dbCon = sqlite3.connect(enwikiDb)
 dbCur = dbCon.cursor()
 dbCur.execute("CREATE TABLE pages (id INT PRIMARY KEY, title TEXT UNIQUE)")
@@ -93,8 +95,8 @@ dbCur.execute("CREATE INDEX pages_title_idx ON pages(title COLLATE NOCASE)")
 dbCur.execute("CREATE TABLE redirects (id INT PRIMARY KEY, target TEXT)")
 dbCur.execute("CREATE INDEX redirects_idx ON redirects(target)")
 dbCur.execute("CREATE TABLE descs (id INT PRIMARY KEY, desc TEXT)")
-# Read through dump file
-print("Reading dump file")
+
+print("Iterating through dump file")
 with bz2.open(dumpFile, mode='rt') as file:
 	dump = mwxml.Dump.from_file(file)
 	pageNum = 0
@@ -102,13 +104,15 @@ with bz2.open(dumpFile, mode='rt') as file:
 		pageNum += 1
 		if pageNum % 1e4 == 0:
 			print(f"At page {pageNum}")
+		if pageNum > 3e4:
+			break
 		# Parse page
 		if page.namespace == 0:
 			try:
 				dbCur.execute("INSERT INTO pages VALUES (?, ?)", (page.id, convertTitle(page.title)))
 			except sqlite3.IntegrityError as e:
 				# Accounts for certain pages that have the same title
-				print(f"Failed to add page with title \"{page.title}\": {e}")
+				print(f"Failed to add page with title \"{page.title}\": {e}", file=sys.stderr)
 				continue
 			if page.redirect != None:
 				dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (page.id, convertTitle(page.redirect)))
@@ -117,6 +121,7 @@ with bz2.open(dumpFile, mode='rt') as file:
 				desc = parseDesc(revision.text)
 				if desc != None:
 					dbCur.execute("INSERT INTO descs VALUES (?, ?)", (page.id, desc))
-# Close db
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/enwiki/genDumpIndexDb.py b/backend/data/enwiki/genDumpIndexDb.py
index ee3e813..3955885 100755
--- a/backend/data/enwiki/genDumpIndexDb.py
+++ b/backend/data/enwiki/genDumpIndexDb.py
@@ -4,25 +4,26 @@ import sys, os, re
 import bz2
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads a Wikimedia enwiki dump index file,\n"
-usageInfo += "and stores it's offset and title data to an sqlite db.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Adds data from the wiki dump index-file into a database.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # 22,034,540 lines
+indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # Had about 22e6 lines
 indexDb = "dumpIndex.db"
 
-# Check for existing db
 if os.path.exists(indexDb):
-	print(f"ERROR: Existing {indexDb}", file=sys.stderr)
-	sys.exit(1)
-# Create db
+	raise Exception(f"ERROR: Existing {indexDb}")
+print("Creating database")
 dbCon = sqlite3.connect(indexDb)
 dbCur = dbCon.cursor()
 dbCur.execute("CREATE TABLE offsets (title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT)")
-# Reading index file
+
+print("Iterating through index file")
 lineRegex = re.compile(r"([^:]+):([^:]+):(.*)")
 lastOffset = 0
 lineNum = 0
@@ -42,7 +43,7 @@ with bz2.open(indexFile, mode='rt') as file:
 					dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (t, p, lastOffset, offset))
 				except sqlite3.IntegrityError as e:
 					# Accounts for certain entries in the file that have the same title
-					print(f"Failed on title \"{t}\": {e}")
+					print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
 			entriesToAdd = []
 			lastOffset = offset
 		entriesToAdd.append([title, pageId])
@@ -50,7 +51,8 @@ for (title, pageId) in entriesToAdd:
 	try:
 		dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (title, pageId, lastOffset, -1))
 	except sqlite3.IntegrityError as e:
-		print(f"Failed on title \"{t}\": {e}")
-# Close db
+		print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/enwiki/genImgData.py b/backend/data/enwiki/genImgData.py
index 9bd28f4..dedfe14 100755
--- a/backend/data/enwiki/genImgData.py
+++ b/backend/data/enwiki/genImgData.py
@@ -4,9 +4,15 @@ import sys, re
 import bz2, html, urllib.parse
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "For a set of page-ids, looks up their content in an enwiki dump,\n"
-usageInfo += "trying to get infobox image filenames, adding info to an sqlite db.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+For some set of page IDs, looks up their content in the wiki dump,
+and tries to parse infobox image names, storing them into a database.
+
+The program can be re-run with an updated set of page IDs, and
+will skip already-processed page IDs.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -21,58 +27,64 @@ def getInputPageIds():
 	return pageIds
 dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
 indexDb = "dumpIndex.db"
-imgDb = "imgData.db" # Output db
+imgDb = "imgData.db" # The database to create
 idLineRegex = re.compile(r"<id>(.*)</id>")
 imageLineRegex = re.compile(r".*\| *image *= *([^|]*)")
 bracketImageRegex = re.compile(r"\[\[(File:[^|]*).*]]")
 imageNameRegex = re.compile(r".*\.(jpg|jpeg|png|gif|tiff|tif)", flags=re.IGNORECASE)
 cssImgCropRegex = re.compile(r"{{css image crop\|image *= *(.*)", flags=re.IGNORECASE)
+# In testing, got about 360k image names
 
-# Open dbs
+print("Getting input page-ids")
+pageIds = getInputPageIds()
+print(f"Found {len(pageIds)}")
+
+print("Opening databases")
 indexDbCon = sqlite3.connect(indexDb)
 indexDbCur = indexDbCon.cursor()
 imgDbCon = sqlite3.connect(imgDb)
 imgDbCur = imgDbCon.cursor()
-# Create image-db table
-pidsDone = set()
+print("Checking tables")
 if imgDbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='page_imgs'").fetchone() == None:
+	# Create tables if not present
 	imgDbCur.execute("CREATE TABLE page_imgs (page_id INT PRIMARY KEY, img_name TEXT)") # img_name may be NULL
 	imgDbCur.execute("CREATE INDEX page_imgs_idx ON page_imgs(img_name)")
 else:
+	# Check for already-processed page IDs
+	numSkipped = 0
 	for (pid,) in imgDbCur.execute("SELECT page_id FROM page_imgs"):
-		pidsDone.add(pid)
-	print(f"Will skip {len(pidsDone)} already-processed page-ids")
-# Get input pageIds
-print("Getting input page-ids", file=sys.stderr)
-pageIds = getInputPageIds()
-for pid in pidsDone:
-	pageIds.remove(pid)
-print(f"Found {len(pageIds)} page-ids to process")
-# Get page-id dump-file offsets
-print("Getting dump-file offsets", file=sys.stderr)
+		if pid in pageIds:
+			pageIds.remove(pid)
+			numSkipped += 1
+		else:
+			print(f"WARNING: Found already-processed page ID {pid} which was not in input set")
+	print(f"Will skip {numSkipped} already-processed page IDs")
+
+print("Getting dump-file offsets")
 offsetToPageids = {}
-offsetToEnd = {}
+offsetToEnd = {} # Maps chunk-start offsets to their chunk-end offsets
 iterNum = 0
 for pageId in pageIds:
 	iterNum += 1
 	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}", file=sys.stderr)
+		print(f"At iteration {iterNum}")
 	#
 	query = "SELECT offset, next_offset FROM offsets WHERE id = ?"
 	row = indexDbCur.execute(query, (pageId,)).fetchone()
 	if row == None:
-		print(f"WARNING: Page id {pageId} not found", file=sys.stderr)
+		print(f"WARNING: Page ID {pageId} not found")
 		continue
 	(chunkOffset, endOffset) = row
 	offsetToEnd[chunkOffset] = endOffset
 	if chunkOffset not in offsetToPageids:
 		offsetToPageids[chunkOffset] = []
 	offsetToPageids[chunkOffset].append(pageId)
-print(f"Found {len(offsetToEnd)} chunks to check", file=sys.stderr)
-# Look through dump file, jumping to chunks containing relevant pages
-print("Reading through dump file", file=sys.stderr)
+print(f"Found {len(offsetToEnd)} chunks to check")
+
+print("Iterating through chunks in dump file")
 def getImageName(content):
-	""" Given an array of text-content lines, returns an image-filename, or None """
+	" Given an array of text-content lines, tries to return an infoxbox image name, or None "
+	# Doesn't try and find images in outside-infobox [[File:...]] and <imagemap> sections
 	for line in content:
 		match = imageLineRegex.match(line)
 		if match != None:
@@ -109,16 +121,15 @@ def getImageName(content):
 				imageName = html.unescape(imageName) # Intentionally unescaping again (handles some odd cases)
 				imageName = imageName.replace("_", " ")
 				return imageName
-			# Skip lines like: | image = &lt;imagemap&gt;
+			# Exclude lines like: | image = &lt;imagemap&gt;
 			return None
-	# Doesn't try and find images in outside-infobox [[File:...]] and <imagemap> sections
 	return None
 with open(dumpFile, mode='rb') as file:
 	iterNum = 0
 	for (pageOffset, endOffset) in offsetToEnd.items():
 		iterNum += 1
 		if iterNum % 100 == 0:
-			print(f"At iteration {iterNum}", file=sys.stderr)
+			print(f"At iteration {iterNum}")
 		#
 		pageIds = offsetToPageids[pageOffset]
 		# Jump to chunk
@@ -168,11 +179,12 @@ with open(dumpFile, mode='rb') as file:
 					imgDbCur.execute("INSERT into page_imgs VALUES (?, ?)", (pageId, imageName))
 					break
 				if not foundTextEnd:
-					print(f"Did not find </text> for page id {pageId}", file=sys.stderr)
+					print(f"WARNING: Did not find </text> for page id {pageId}")
 				break
 			if not foundText:
-				print(f"Did not find <text> for page id {pageId}", file=sys.stderr)
-# Close dbs
+				print(f"WARNING: Did not find <text> for page id {pageId}")
+
+print("Closing databases")
 indexDbCon.close()
 imgDbCon.commit()
 imgDbCon.close()
diff --git a/backend/data/enwiki/lookupPage.py b/backend/data/enwiki/lookupPage.py
index 76f2f95..1a90851 100755
--- a/backend/data/enwiki/lookupPage.py
+++ b/backend/data/enwiki/lookupPage.py
@@ -4,9 +4,12 @@ import sys, re
 import bz2
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]} title1\n"
-usageInfo += "Looks up a page with title title1 in a wikipedia dump,\n"
-usageInfo += "using a dump index db, and prints the corresponding <page>.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]} title1
+
+Looks up a page with title title1 in the wiki dump, using
+the dump-index db, and prints the corresponding <page>.
+"""
 if len(sys.argv) != 2:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -15,20 +18,19 @@ dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
 indexDb = "dumpIndex.db"
 pageTitle = sys.argv[1].replace("_", " ")
 
-# Searching index file
-print("Lookup offset in index db")
+print("Looking up offset in index db")
 dbCon = sqlite3.connect(indexDb)
 dbCur = dbCon.cursor()
 query = "SELECT title, offset, next_offset FROM offsets WHERE title = ?"
 row = dbCur.execute(query, (pageTitle,)).fetchone()
 if row == None:
 	print("Title not found")
-	sys.exit(1)
-(_, pageOffset, endOffset) = row
+	sys.exit(0)
+_, pageOffset, endOffset = row
 dbCon.close()
 print(f"Found chunk at offset {pageOffset}")
-# Read dump file
-print("Reading dump file")
+
+print("Reading from wiki dump")
 content = []
 with open(dumpFile, mode='rb') as file:
 	# Get uncompressed chunk
@@ -61,6 +63,6 @@ with open(dumpFile, mode='rb') as file:
 					if line.lstrip() == "</page>":
 						break
 		lineIdx += 1
-# Print content
+
 print("Content: ")
 print("\n".join(content))
diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md
index fbb008d..8c527a8 100644
--- a/backend/data/eol/README.md
+++ b/backend/data/eol/README.md
@@ -11,9 +11,10 @@ This directory holds files obtained from/using the [Encyclopedia of Life](https:
     Contains metadata for images from EOL.
 -   imagesList/ <br>
     Extracted from imagesList.tgz.
+-   genImagesListDb.sh <br>
+    Creates a database, and imports imagesList/*.csv files into it.
 -   imagesList.db <br>
-    Contains data from imagesList/.
-    Created by running genImagesListDb.sh, which simply imports csv files into a database. <br>
+    Created by running genImagesListDb.sh <br>
     Tables: <br>
     -   `images`:
         `content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT`
diff --git a/backend/data/eol/downloadImgs.py b/backend/data/eol/downloadImgs.py
index ac72ea1..96bc085 100755
--- a/backend/data/eol/downloadImgs.py
+++ b/backend/data/eol/downloadImgs.py
@@ -7,18 +7,24 @@ import time
 from threading import Thread
 import signal
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Downloads images from URLs specified in an image-list database,\n"
-usageInfo += "for a specified set of EOL IDs. Downloaded images get names of\n"
-usageInfo += "the form 'eolId1 contentId1.ext1'.\n"
-usageInfo += "\n"
-usageInfo += "SIGINT causes the program to finish ongoing downloads and exit.\n"
-usageInfo += "The program can be re-run to continue downloading. It looks for\n"
-usageInfo += "existing downloaded files, and continues after the one with\n"
-usageInfo += "highest EOL ID.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+For some set of EOL IDs, downloads associated images from URLs in
+an image-list database. Uses multiple downloading threads.
+
+May obtain multiple images per ID. The images will get names
+with the form 'eolId1 contentId1.ext1'.
+
+SIGINT causes the program to finish ongoing downloads and exit.
+The program can be re-run to continue downloading. It looks for
+already-downloaded files, and continues after the one with
+highest EOL ID.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
+# In testing, this downloaded about 70k images, over a few days
 
 imagesListDb = "imagesList.db"
 def getInputEolIds():
@@ -30,44 +36,29 @@ def getInputEolIds():
 	dbCon.close()
 	return eolIds
 outDir = "imgsForReview/"
-LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain"
+MAX_IMGS_PER_ID = 3
+MAX_THREADS = 5
 POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread)
 POST_DL_DELAY_MAX = 3
+LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain"
 
-# Get eol-ids from data db
 print("Getting input EOL IDs")
 eolIds = getInputEolIds()
-# Get eol-ids from images db
-print("Getting images-list-db EOL IDs")
+print("Getting EOL IDs to download for")
+# Get IDs from images-list db
 imgDbCon = sqlite3.connect(imagesListDb)
 imgCur = imgDbCon.cursor()
 imgListIds = set()
-for row in imgCur.execute("SELECT DISTINCT page_id FROM images"):
-	imgListIds.add(row[0])
-# Get eol-id intersection, and sort into list
+for (pageId,) in imgCur.execute("SELECT DISTINCT page_id FROM images"):
+	imgListIds.add(pageId)
+# Get set intersection, and sort into list
 eolIds = eolIds.intersection(imgListIds)
 eolIds = sorted(eolIds)
-print(f"Resulted in {len(eolIds)} EOL IDs")
+print(f"Result: {len(eolIds)} EOL IDs")
 
-MAX_IMGS_PER_ID = 3
-MAX_THREADS = 5
-numThreads = 0
-threadException = None # Used for ending main thread after a non-main thread exception
-def downloadImg(url, outFile):
-	global numThreads, threadException
-	try:
-		data = requests.get(url)
-		with open(outFile, 'wb') as file:
-			file.write(data.content)
-		time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN)
-	except Exception as e:
-		print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr)
-		threadException = e
-	numThreads -= 1
-# Create output directory if not present
+print("Checking output directory")
 if not os.path.exists(outDir):
 	os.mkdir(outDir)
-# Find next eol ID to download for
 print("Finding next ID to download for")
 nextIdx = 0
 fileList = os.listdir(outDir)
@@ -78,7 +69,11 @@ if len(ids) > 0:
 if nextIdx == len(eolIds):
 	print("No IDs left. Exiting...")
 	sys.exit(0)
-# Detect SIGINT signals
+
+print("Starting download threads")
+numThreads = 0
+threadException = None # Used for ending main thread after a non-main thread exception
+# Handle SIGINT signals
 interrupted = False
 oldHandler = None
 def onSigint(sig, frame):
@@ -86,33 +81,27 @@ def onSigint(sig, frame):
 	interrupted = True
 	signal.signal(signal.SIGINT, oldHandler)
 oldHandler = signal.signal(signal.SIGINT, onSigint)
+# Function for threads to execute
+def downloadImg(url, outFile):
+	global numThreads, threadException
+	try:
+		data = requests.get(url)
+		with open(outFile, 'wb') as file:
+			file.write(data.content)
+		time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN)
+	except Exception as e:
+		print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr)
+		threadException = e
+	numThreads -= 1
 # Manage downloading
 for idx in range(nextIdx, len(eolIds)):
 	eolId = eolIds[idx]
 	# Get image urls
 	imgDataList = []
 	ownerSet = set() # Used to get images from different owners, for variety
-	for row in imgCur.execute(
-		"SELECT content_id, page_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?", (eolId,)):
-		license = row[3]
-		copyrightOwner = row[4]
-		if re.fullmatch(LICENSE_REGEX, license) == None:
-			continue
-		if len(copyrightOwner) > 100: # Ignore certain copyrightOwner fields that seem long and problematic
-			continue
-		if copyrightOwner not in ownerSet:
-			ownerSet.add(copyrightOwner)
-			imgDataList.append(row)
-			if len(ownerSet) == MAX_IMGS_PER_ID:
-				break
-	if len(imgDataList) == 0:
-		continue
-	# Determine output filenames
-	outFiles = []
-	urls = []
-	for row in imgDataList:
-		contentId = row[0]
-		url = row[2]
+	exitLoop = False
+	query = "SELECT content_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?"
+	for (contentId, url, license, copyrightOwner) in imgCur.execute(query, (eolId,)):
 		if url.startswith("data/"):
 			url = "https://content.eol.org/" + url
 		urlParts = urllib.parse.urlparse(url)
@@ -120,28 +109,37 @@ for idx in range(nextIdx, len(eolIds)):
 		if len(extension) <= 1:
 			print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr)
 			continue
-		outFiles.append(str(eolId) + " " + str(contentId) + extension)
-		urls.append(url)
-	# Start downloads
-	exitLoop = False
-	for i in range(len(outFiles)):
-		outPath = outDir + outFiles[i]
-		if not os.path.exists(outPath):
-			# Enforce thread limit
-			while numThreads == MAX_THREADS:
+		# Check image-quantity limit
+		if len(ownerSet) == MAX_IMGS_PER_ID:
+			break
+		# Check for skip conditions
+		if re.fullmatch(LICENSE_REGEX, license) == None:
+			continue
+		if len(copyrightOwner) > 100: # Avoid certain copyrightOwner fields that seem long and problematic
+			continue
+		if copyrightOwner in ownerSet:
+			continue
+		ownerSet.add(copyrightOwner)
+		# Determine output filename
+		outPath = f"{outDir}{eolId} {contentId}{extension}"
+		if os.path.exists(outPath):
+			print(f"WARNING: {outPath} already exists. Skipping download.")
+			continue
+		# Check thread limit
+		while numThreads == MAX_THREADS:
+			time.sleep(1)
+		# Wait for threads after an interrupt or thread-exception
+		if interrupted or threadException != None:
+			print("Waiting for existing threads to end")
+			while numThreads > 0:
 				time.sleep(1)
-			# Wait for threads after an interrupt or thread-exception
-			if interrupted or threadException != None:
-				print("Waiting for existing threads to end")
-				while numThreads > 0:
-					time.sleep(1)
-				exitLoop = True
-				break
-			print(f"Downloading image to {outPath}")
-			# Perform download
-			numThreads += 1
-			thread = Thread(target=downloadImg, args=(urls[i], outPath), daemon=True)
-			thread.start()
+			exitLoop = True
+			break
+		# Perform download
+		print(f"Downloading image to {outPath}")
+		numThreads += 1
+		thread = Thread(target=downloadImg, args=(url, outPath), daemon=True)
+		thread.start()
 	if exitLoop:
 		break
 # Close images-list db
diff --git a/backend/data/eol/genImagesListDb.sh b/backend/data/eol/genImagesListDb.sh
index 3a8ced7..87dd840 100755
--- a/backend/data/eol/genImagesListDb.sh
+++ b/backend/data/eol/genImagesListDb.sh
@@ -1,7 +1,9 @@
 #!/bin/bash
 set -e
 
+# Combine CSV files into one, skipping header lines
 cat imagesList/media_*_{1..58}.csv | tail -n +2 > imagesList.csv
+# Create database, and import the CSV file
 sqlite3 imagesList.db <<END
 CREATE TABLE images (
 	content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT);
diff --git a/backend/data/eol/reviewImgs.py b/backend/data/eol/reviewImgs.py
index 5290f9e..ecdf7ab 100755
--- a/backend/data/eol/reviewImgs.py
+++ b/backend/data/eol/reviewImgs.py
@@ -7,11 +7,14 @@ from tkinter import ttk
 import PIL
 from PIL import ImageTk, Image, ImageOps
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Provides a GUI for reviewing images. Looks in a for-review directory for\n"
-usageInfo += "images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to\n"
-usageInfo += "choose an image to keep, or reject all. Also provides image rotation.\n"
-usageInfo += "Chosen images are placed in another directory, and rejected ones are deleted.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Provides a GUI for reviewing images. Looks in a for-review directory for
+images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to
+choose an image to keep, or reject all. Also provides image rotation.
+Chosen images are placed in another directory, and rejected ones are deleted.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -21,6 +24,7 @@ outDir = "imgs/"
 extraInfoDbCon = sqlite3.connect("../data.db")
 extraInfoDbCur = extraInfoDbCon.cursor()
 def getExtraInfo(eolId):
+	global extraInfoDbCur
 	query = "SELECT names.alt_name FROM" \
 		" names INNER JOIN eol_ids ON eol_ids.name = names.name" \
 		" WHERE id = ? and pref_alt = 1"
@@ -31,21 +35,21 @@ def getExtraInfo(eolId):
 		return f"Reviewing EOL ID {eolId}"
 IMG_DISPLAY_SZ = 400
 MAX_IMGS_PER_ID = 3
-PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135))
+IMG_BG_COLOR = (88, 28, 135)
+PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), IMG_BG_COLOR)
 
-# Create output directory if not present
+print("Checking output directory")
 if not os.path.exists(outDir):
 	os.mkdir(outDir)
-# Get images for review
-print("Reading input image list")
+print("Getting input image list")
 imgList = os.listdir(imgDir)
 imgList.sort(key=lambda s: int(s.split(" ")[0]))
 if len(imgList) == 0:
-	print("No input images found", file=sys.stderr)
-	sys.exit(1)
+	print("No input images found")
+	sys.exit(0)
 
 class EolImgReviewer:
-	""" Provides the GUI for reviewing images """
+	" Provides the GUI for reviewing images "
 	def __init__(self, root, imgList):
 		self.root = root
 		root.title("EOL Image Reviewer")
@@ -68,7 +72,7 @@ class EolImgReviewer:
 		# Add padding
 		for child in mainFrame.winfo_children():
 			child.grid_configure(padx=5, pady=5)
-		# Add bindings
+		# Add keyboard bindings
 		root.bind("<q>", self.quit)
 		root.bind("<Key-j>", lambda evt: self.accept(0))
 		root.bind("<Key-k>", lambda evt: self.accept(1))
@@ -87,11 +91,11 @@ class EolImgReviewer:
 		self.nextImgNames = []
 		self.rotations = []
 		self.getNextImgs()
-		# For more info
+		# For displaying extra info
 		self.numReviewed = 0
 		self.startTime = time.time()
 	def getNextImgs(self):
-		""" Updates display with new images to review, or ends program """
+		" Updates display with new images to review, or ends program "
 		# Gather names of next images to review
 		for i in range(MAX_IMGS_PER_ID):
 			if self.imgListIdx == len(self.imgList):
@@ -123,7 +127,7 @@ class EolImgReviewer:
 					del self.nextImgNames[idx]
 					del self.rotations[idx]
 					continue
-				self.imgs[idx] = self.resizeForDisplay(img)
+				self.imgs[idx] = self.resizeImgForDisplay(img)
 			else:
 				self.imgs[idx] = PLACEHOLDER_IMG
 			self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx])
@@ -140,7 +144,7 @@ class EolImgReviewer:
 		title += f" (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})"
 		self.root.title(title)
 	def accept(self, imgIdx):
-		""" React to a user selecting an image """
+		" React to a user selecting an image "
 		if imgIdx >= len(self.nextImgNames):
 			print("Invalid selection")
 			return
@@ -159,19 +163,20 @@ class EolImgReviewer:
 		self.numReviewed += 1
 		self.getNextImgs()
 	def reject(self):
-		""" React to a user rejecting all images of a set """
+		" React to a user rejecting all images of a set "
 		for i in range(len(self.nextImgNames)):
 			os.remove(imgDir + self.nextImgNames[i])
 		self.numReviewed += 1
 		self.getNextImgs()
 	def rotate(self, imgIdx, anticlockwise = False):
-		""" Respond to a user rotating an image """
+		" Respond to a user rotating an image "
 		deg = -90 if not anticlockwise else 90
 		self.imgs[imgIdx] = self.imgs[imgIdx].rotate(deg)
 		self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx])
 		self.labels[imgIdx].config(image=self.photoImgs[imgIdx])
 		self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360
 	def quit(self, e = None):
+		global extraInfoDbCon
 		print(f"Number reviewed: {self.numReviewed}")
 		timeElapsed = time.time() - self.startTime
 		print(f"Time elapsed: {timeElapsed:.2f} seconds")
@@ -179,8 +184,8 @@ class EolImgReviewer:
 			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
 		extraInfoDbCon.close()
 		self.root.destroy()
-	def resizeForDisplay(self, img):
-		""" Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """
+	def resizeImgForDisplay(self, img):
+		" Returns a copy of an image, shrunk to fit in it's frame (keeps aspect ratio), and with a background "
 		if max(img.width, img.height) > IMG_DISPLAY_SZ:
 			if (img.width > img.height):
 				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
@@ -194,6 +199,7 @@ class EolImgReviewer:
 			int((IMG_DISPLAY_SZ - img.height) / 2)))
 		return bgImg
 # Create GUI and defer control
+print("Starting GUI")
 root = tki.Tk()
 EolImgReviewer(root, imgList)
 root.mainloop()
diff --git a/backend/data/genDbpData.py b/backend/data/genDbpData.py
index afe1e17..df3a6be 100755
--- a/backend/data/genDbpData.py
+++ b/backend/data/genDbpData.py
@@ -3,11 +3,12 @@
 import sys, os, re
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads DBpedia data from dbpedia/*, along with tree-of-life\n"
-usageInfo += "node and name data from a sqlite database, associates nodes with\n"
-usageInfo += "DBpedia IRIs, and adds alt-name and description information for\n"
-usageInfo += "those nodes.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads a database containing data from DBpedia, and tries to associate
+DBpedia IRIs with nodes in a database, adding short-descriptions for them.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -16,18 +17,21 @@ dbpediaDb = "dbpedia/descData.db"
 namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
 pickedLabelsFile = "pickedDbpLabels.txt"
 dbFile = "data.db"
+rootNodeName = "cellular organisms"
+rootLabel = "organism" # Will be associated with root node
+# Got about 400k descriptions when testing
 
-# Open dbs
+print("Opening databases")
 dbpCon = sqlite3.connect(dbpediaDb)
 dbpCur = dbpCon.cursor()
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Get node names
-print("Reading node names")
+
+print("Getting node names")
 nodeNames = set()
 for (name,) in dbCur.execute("SELECT name from nodes"):
 	nodeNames.add(name)
-# Skipping certain names
+
 print("Checking for names to skip")
 oldSz = len(nodeNames)
 if os.path.exists(namesToSkipFile):
@@ -35,22 +39,22 @@ if os.path.exists(namesToSkipFile):
 		for line in file:
 			nodeNames.remove(line.rstrip())
 print(f"Skipping {oldSz - len(nodeNames)} nodes")
-# Get disambiguation page labels
+
 print("Reading disambiguation-page labels")
 disambigLabels = set()
 query = "SELECT labels.iri from labels INNER JOIN disambiguations ON labels.iri = disambiguations.iri"
 for (label,) in dbpCur.execute(query):
 	disambigLabels.add(label)
-# Try associating nodes with IRIs, accounting for disambiguation labels
-print("Trying to associate nodes with labels")
+
+print("Trying to associate nodes with DBpedia labels")
 nodeToLabel = {}
-nameVariantRegex = re.compile(r"(.*) \(([^)]+)\)")
-nameToVariants = {}
+nameVariantRegex = re.compile(r"(.*) \(([^)]+)\)") # Used to recognise labels like 'Thor (shrimp)'
+nameToVariants = {} # Maps node names to lists of matching labels
 iterNum = 0
 for (label,) in dbpCur.execute("SELECT label from labels"):
 	iterNum += 1
 	if iterNum % 1e5 == 0:
-		print(f"Processing line {iterNum}")
+		print(f"At iteration {iterNum}")
 	#
 	if label in disambigLabels:
 		continue
@@ -69,18 +73,20 @@ for (label,) in dbpCur.execute("SELECT label from labels"):
 					nameToVariants[subName] = [label]
 				elif name not in nameToVariants[subName]:
 					nameToVariants[subName].append(label)
+# Associate labels without conflicts
 for (name, variants) in nameToVariants.items():
 	if len(variants) == 1:
 		nodeToLabel[name] = variants[0]
 for name in nodeToLabel:
 	del nameToVariants[name]
-nodeToLabel["cellular organisms"] = "organism" # Special case for root node
-print(f"Number of conflicts: {len(nameToVariants)}")
-# Try resolving conflicts
+# Special case for root node
+nodeToLabel[rootNodeName] = rootLabel
+if rootNodeName in nameToVariants:
+	del nameToVariants["cellular organisms"]
+
+print("Trying to resolve {len(nameToVariants)} conflicts")
 def resolveWithPickedLabels():
-	# Attempts conflict resolution using a file with lines of the form 'name1|label1',
-		# where label1 may be absent, indicating that no label should be associated with the name
-	print("Resolving conflicts using picked-labels")
+	" Attempts to resolve conflicts using a picked-names file "
 	with open(pickedLabelsFile) as file:
 		for line in file:
 			(name, _, label) = line.rstrip().partition("|")
@@ -94,11 +100,13 @@ def resolveWithPickedLabels():
 					print(f"INFO: Picked label \"{label}\" for name \"{name}\" outside choice set", file=sys.stderr)
 				nodeToLabel[name] = label
 				del nameToVariants[name]
-	print(f"Remaining number of conflicts: {len(nameToVariants)}")
 def resolveWithCategoryList():
-	# Attempts conflict resolution using category-text in labels of the form 'name1 (category1)'
-	# Does a generic-category pass first (avoid stuff like Pan being classified as a horse instead of an ape)
-	print("Resolving conflicts using category-list")
+	"""
+	Attempts to resolve conflicts by looking for labels like 'name1 (category1)',
+	and choosing those with a category1 that seems 'biological'.
+	Does two passes, using more generic categories first. This helps avoid stuff like
+	Pan being classified as a horse instead of an ape.
+	"""
 	generalCategories = {
 		"species", "genus",
 		"plant", "fungus", "animal",
@@ -107,7 +115,7 @@ def resolveWithCategoryList():
 	}
 	specificCategories = {
 		"protist", "alveolate", "dinoflagellates",
-		"orchid", "Poaceae", "fern", "moss", "alga",
+		"orchid", "poaceae", "fern", "moss", "alga",
 		"bryozoan", "hydrozoan",
 		"sponge", "cnidarian", "coral", "polychaete", "echinoderm",
 		"bivalve", "gastropod", "chiton",
@@ -139,10 +147,8 @@ def resolveWithCategoryList():
 					break
 	for name in namesToRemove:
 		del nameToVariants[name]
-	print(f"Remaining number of conflicts: {len(nameToVariants)}")
 def resolveWithTypeData():
-	# Attempts conflict-resolution using dbpedia's instance-type data
-	print("Resolving conflicts using instance-type data")
+	" Attempts to resolve conflicts using DBpedia's type data "
 	taxonTypes = { # Obtained from the DBpedia ontology
 		"http://dbpedia.org/ontology/Species",
 		"http://dbpedia.org/ontology/Archaea",
@@ -179,7 +185,7 @@ def resolveWithTypeData():
 	for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN types on labels.iri = types.iri"):
 		iterNum += 1
 		if iterNum % 1e5 == 0:
-			print(f"Processing line {iterNum}")
+			print(f"At iteration {iterNum}")
 		#
 		if type in taxonTypes:
 			name = label.lower()
@@ -193,20 +199,17 @@ def resolveWithTypeData():
 					if name in nameToVariants:
 						nodeToLabel[name] = label
 						del nameToVariants[name]
-	print(f"Remaining number of conflicts: {len(nameToVariants)}")
+#resolveWithTypeData()
+#resolveWithCategoryList()
 resolveWithPickedLabels()
-# Associate nodes with IRIs
+print(f"Remaining number of conflicts: {len(nameToVariants)}")
+
 print("Getting node IRIs")
 nodeToIri = {}
-iterNum = 0
 for (name, label) in nodeToLabel.items():
-	row = dbpCur.execute("SELECT iri FROM labels where label = ? COLLATE NOCASE", (label,)).fetchone()
-	if row == None:
-		print(f"ERROR: Couldn't find label {label}", file=sys.stderr)
-		sys.exit(1)
-	else:
-		nodeToIri[name] = row[0]
-# Resolve redirects
+	(iri,) = dbpCur.execute("SELECT iri FROM labels where label = ? COLLATE NOCASE", (label,)).fetchone()
+	nodeToIri[name] = iri
+
 print("Resolving redirects")
 redirectingIriSet = set()
 iterNum = 0
@@ -219,9 +222,10 @@ for (name, iri) in nodeToIri.items():
 	if row != None:
 		nodeToIri[name] = row[0]
 		redirectingIriSet.add(name)
-# Find descriptions, and add to db
-print("Adding node description data")
+
+print("Adding description tables")
 dbCur.execute("CREATE TABLE wiki_ids (name TEXT PRIMARY KEY, id INT, redirected INT)")
+dbCur.execute("CREATE INDEX wiki_id_idx ON wiki_ids(id)")
 dbCur.execute("CREATE TABLE descs (wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT)")
 iterNum = 0
 for (name, iri) in nodeToIri.items():
@@ -232,10 +236,11 @@ for (name, iri) in nodeToIri.items():
 	query = "SELECT abstract, id FROM abstracts INNER JOIN ids ON abstracts.iri = ids.iri WHERE ids.iri = ?"
 	row = dbpCur.execute(query, (iri,)).fetchone()
 	if row != None:
-		(desc, wikiId) = row
+		desc, wikiId = row
 		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, wikiId, 1 if name in redirectingIriSet else 0))
 		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (wikiId, desc, 1))
-# Close dbs
+
+print("Closing databases")
 dbCon.commit()
 dbCon.close()
 dbpCon.commit()
diff --git a/backend/data/genEnwikiDescData.py b/backend/data/genEnwikiDescData.py
index dbc8d6b..d3f93ed 100755
--- a/backend/data/genEnwikiDescData.py
+++ b/backend/data/genEnwikiDescData.py
@@ -3,10 +3,13 @@
 import sys, re, os
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads Wikimedia enwiki data from enwiki/, and node and name data"
-usageInfo += "from a sqlite database, and adds description data for names that\n"
-usageInfo += "don't have them.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads a database containing data from Wikipedia, and tries to associate
+wiki pages with nodes in the database, and add descriptions for nodes
+that don't have them.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -15,36 +18,39 @@ enwikiDb = "enwiki/descData.db"
 dbFile = "data.db"
 namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
 pickedLabelsFile = "pickedEnwikiLabels.txt"
+# Got about 25k descriptions when testing
 
-# Open dbs
+print("Opening databases")
 enwikiCon = sqlite3.connect(enwikiDb)
 enwikiCur = enwikiCon.cursor()
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Read name/title files
+
+print("Checking for names to skip")
 namesToSkip = set()
-nameToPickedTitle = {} # Maps names to titles to be used for them
 if os.path.exists(namesToSkipFile):
 	with open(namesToSkipFile) as file:
 		for line in file:
 			namesToSkip.add(line.rstrip())
-	print(f"Read in {len(namesToSkip)} names to skip")
+	print(f"Found {len(namesToSkip)}")
+print("Checking for picked-titles")
+nameToPickedTitle = {}
 if os.path.exists(pickedLabelsFile):
 	with open(pickedLabelsFile) as file:
 		for line in file:
 			(name, _, title) = line.rstrip().partition("|")
 			nameToPickedTitle[name.lower()] = title
-print(f"Read in {len(nameToPickedTitle)} titles to use for certain names")
-# Get node names without descriptions
-print("Getting node names")
+print(f"Found {len(nameToPickedTitle)}")
+
+print("Getting names of nodes without descriptions")
 nodeNames = set()
 query = "SELECT nodes.name FROM nodes LEFT JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id IS NULL"
-for row in dbCur.execute(query):
-	nodeNames.add(row[0])
-print(f"Found {len(nodeNames)} names")
+for (name,) in dbCur.execute(query):
+	nodeNames.add(name)
+print(f"Found {len(nodeNames)}")
 nodeNames.difference_update(namesToSkip)
-# Find page id for each node name
-print("Getting node page-ids")
+
+print("Associating nodes with page IDs")
 nodeToPageId = {}
 iterNum = 0
 for name in nodeNames:
@@ -63,34 +69,34 @@ for name in nodeNames:
 			nodeToPageId[name] = row[0]
 		else:
 			print("WARNING: Picked title {title} not found", file=sys.stderr)
-# Resolve redirects
+
 print("Resolving redirects")
 redirectingNames = set()
 iterNum = 0
 for (name, pageId) in nodeToPageId.items():
 	iterNum += 1
-	if iterNum % 1000 == 0:
+	if iterNum % 1e3 == 0:
 		print(f"At iteration {iterNum}")
 	#
-	row = enwikiCur.execute(
-		"SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?",
-		(pageId,)).fetchone()
+	query = "SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?"
+	row = enwikiCur.execute(query, (pageId,)).fetchone()
 	if row != None:
 		nodeToPageId[name] = row[0]
 		redirectingNames.add(name)
-# Add descriptions for each node
+
 print("Adding description data")
 iterNum = 0
 for (name, pageId) in nodeToPageId.items():
 	iterNum += 1
-	if iterNum % 1000 == 0:
+	if iterNum % 1e3 == 0:
 		print(f"At iteration {iterNum}")
 	#
 	row = enwikiCur.execute("SELECT desc FROM descs where descs.id = ?", (pageId,)).fetchone()
 	if row != None:
 		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, pageId, 1 if name in redirectingNames else 0))
 		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (pageId, row[0], 0))
-# Close dbs
+
+print("Closing databases")
 dbCon.commit()
 dbCon.close()
 enwikiCon.close()
diff --git a/backend/data/genEnwikiNameData.py b/backend/data/genEnwikiNameData.py
index 8285a40..7ad61d1 100755
--- a/backend/data/genEnwikiNameData.py
+++ b/backend/data/genEnwikiNameData.py
@@ -3,9 +3,13 @@
 import sys, re
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads Wikimedia enwiki redirect data from enwiki/, and node and wiki-id\n"
-usageInfo += "data from a sqlite database, and adds supplmenentary alt-name data.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads from a database containing data from Wikipdia, along with
+node and wiki-id information from the database, and use wikipedia
+page-redirect information to add additional alt-name data.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -15,19 +19,19 @@ dbFile = "data.db"
 altNameRegex = re.compile(r"[a-zA-Z]+")
 	# Avoids names like 'Evolution of Elephants', 'Banana fiber', 'Fish (zoology)',
 
-# Open dbs
+print("Opening databases")
 enwikiCon = sqlite3.connect(enwikiDb)
 enwikiCur = enwikiCon.cursor()
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Get nodes with wiki-ids
+
 print("Getting nodes with wiki IDs")
 nodeToWikiId = {}
-for row in dbCur.execute("SELECT name, id from wiki_ids"):
-	nodeToWikiId[row[0]] = row[1]
-print(f"Found {len(nodeToWikiId)} nodes")
-# Find wiki-ids that redirect to each node
-print("Finding redirecter names")
+for (nodeName, wikiId) in dbCur.execute("SELECT name, id from wiki_ids"):
+	nodeToWikiId[nodeName] = wikiId
+print(f"Found {len(nodeToWikiId)}")
+
+print("Iterating through nodes, finding names that redirect to them")
 nodeToAltNames = {}
 numAltNames = 0
 iterNum = 0
@@ -45,8 +49,8 @@ for (nodeName, wikiId) in nodeToWikiId.items():
 			nodeToAltNames[nodeName].add(name.lower())
 			numAltNames += 1
 print(f"Found {numAltNames} alt-names")
-# Remove existing alt-names
-print("Removing existing alt-names")
+
+print("Excluding existing alt-names from the set")
 query = "SELECT alt_name FROM names WHERE alt_name IN ({})"
 iterNum = 0
 for (nodeName, altNames) in nodeToAltNames.items():
@@ -60,12 +64,13 @@ for (nodeName, altNames) in nodeToAltNames.items():
 	numAltNames -= len(existingNames)
 	altNames.difference_update(existingNames)
 print(f"Left with {numAltNames} alt-names")
-# Add alt-names
-print("Adding alt-names")
+
+print("Adding alt-names to database")
 for (nodeName, altNames) in nodeToAltNames.items():
 	for altName in altNames:
 		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'enwiki')", (nodeName, altName, 0))
-# Close dbs
+
+print("Closing databases")
 dbCon.commit()
 dbCon.close()
 enwikiCon.close()
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py
index d852751..dd33ee0 100755
--- a/backend/data/genEolNameData.py
+++ b/backend/data/genEolNameData.py
@@ -3,34 +3,39 @@
 import sys, re, os
 import html, csv, sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads vernacular-names CSV data (from the Encyclopedia of Life site),\n"
-usageInfo += "makes associations with node data in a sqlite database, and writes\n"
-usageInfo += "name data to that database.\n"
-usageInfo += "\n"
-usageInfo += "Expects a CSV header describing lines with format:\n"
-usageInfo += "    page_id, canonical_form, vernacular_string, language_code,\n"
-usageInfo += "    resource_name, is_preferred_by_resource, is_preferred_by_eol\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads files describing name data from the 'Encyclopedia of Life' site,
+tries to associate names with nodes in the database, and adds tables
+to represent associated names.
+
+Reads a vernacularNames.csv file:
+	Starts with a header line containing:
+		page_id, canonical_form, vernacular_string, language_code,
+		resource_name, is_preferred_by_resource, is_preferred_by_eol
+	The canonical_form and vernacular_string fields contain names
+		associated with the page ID. Names are not always unique to
+		particular page IDs.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-vnamesFile = "eol/vernacularNames.csv"
+vnamesFile = "eol/vernacularNames.csv" # Had about 2.8e6 entries
 dbFile = "data.db"
-NAMES_TO_SKIP = {"unknown", "unknown species", "unidentified species"}
+namesToSkip = {"unknown", "unknown species", "unidentified species"}
 pickedIdsFile = "pickedEolIds.txt"
-badAltsFile = "pickedEolAltsToSkip.txt"
+altsToSkipFile = "pickedEolAltsToSkip.txt"
 
-# Read in vernacular-names data
-	# Note: Canonical-names may have multiple pids
-	# Note: A canonical-name's associated pids might all have other associated names
 print("Reading in vernacular-names data")
-nameToPids = {}
+nameToPids = {} # 'pid' means 'Page ID'
 canonicalNameToPids = {}
 pidToNames = {}
-pidToPreferred = {}
+pidToPreferred = {} # Maps pids to 'preferred' names
 def updateMaps(name, pid, canonical, preferredAlt):
-	if name in NAMES_TO_SKIP:
+	global namesToSkip, nameToPids, canonicalNameToPids, pidToNames, pidToPreferred
+	if name in namesToSkip:
 		return
 	if name not in nameToPids:
 		nameToPids[name] = {pid}
@@ -52,6 +57,9 @@ with open(vnamesFile, newline="") as csvfile:
 	lineNum = 0
 	for row in reader:
 		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		# Skip header line
 		if lineNum == 1:
 			continue
 		# Parse line
@@ -64,7 +72,7 @@ with open(vnamesFile, newline="") as csvfile:
 		updateMaps(name1, pid, True, False)
 		if lang == "eng" and name2 != "":
 			updateMaps(name2, pid, False, preferred)
-# Check for manually-picked pids
+
 print("Checking for manually-picked pids")
 nameToPickedPid = {}
 if os.path.exists(pickedIdsFile):
@@ -73,64 +81,77 @@ if os.path.exists(pickedIdsFile):
 			(name, _, eolId) = line.rstrip().partition("|")
 			nameToPickedPid[name] = None if eolId == "" else int(eolId)
 print(f"Found {len(nameToPickedPid)}")
-# Read in node-alt_names to avoid
-print("Checking for bad-alt-names")
-nameToBadAlts = {}
-if os.path.exists(badAltsFile):
-	with open(badAltsFile) as file:
+
+print("Checking for alt-names to skip")
+nameToAltsToSkip = {}
+numToSkip = 0
+if os.path.exists(altsToSkipFile):
+	with open(altsToSkipFile) as file:
 		for line in file:
 			(name, _, altName) = line.rstrip().partition("|")
-			if name not in nameToBadAlts:
-				nameToBadAlts[name] = [altName]
+			if name not in nameToAltsToSkip:
+				nameToAltsToSkip[name] = [altName]
 			else:
-				nameToBadAlts[name].append(altName)
-print(f"Found bad-alts for {len(nameToBadAlts)} nodes")
-# Open db connection
+				nameToAltsToSkip[name].append(altName)
+			numToSkip += 1
+print(f"Found {numToSkip} alt-names to skip")
+
+print("Creating database tables")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Create tables
 dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name))")
 dbCur.execute("CREATE INDEX names_idx ON names(name)")
 dbCur.execute("CREATE INDEX names_alt_idx ON names(alt_name)")
 dbCur.execute("CREATE INDEX names_alt_idx_nc ON names(alt_name COLLATE NOCASE)")
 dbCur.execute("CREATE TABLE eol_ids(id INT PRIMARY KEY, name TEXT)")
 dbCur.execute("CREATE INDEX eol_name_idx ON eol_ids(name)")
-# Iterate through 'nodes' table, resolving to canonical-names
+
+print("Associating nodes with names")
 usedPids = set()
 unresolvedNodeNames = set()
 dbCur2 = dbCon.cursor()
 def addToDb(nodeName, pidToUse):
-	altNames = set()
-	preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
+	" Adds page-ID-associated name data to a node in the database "
+	global dbCur, pidToPreferred
 	dbCur.execute("INSERT INTO eol_ids VALUES (?, ?)", (pidToUse, nodeName))
+	# Get alt-names
+	altNames = set()
 	for n in pidToNames[pidToUse]:
+		# Avoid alt-names with >3 words
 		if len(n.split(" ")) > 3:
 			continue
+		# Avoid alt-names that already name a node in the database
 		if dbCur.execute("SELECT name FROM nodes WHERE name = ?", (n,)).fetchone() != None:
 			continue
-		if nodeName in nameToBadAlts and n in nameToBadAlts[nodeName]:
-			print(f"Excluding bad-alt {n} for node {nodeName}")
+		# Check for picked alt-name-to-skip
+		if nodeName in nameToAltsToSkip and n in nameToAltsToSkip[nodeName]:
+			print(f"Excluding alt-name {n} for node {nodeName}")
 			continue
+		#
 		altNames.add(n)
+	# Add alt-names to db
+	preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
 	for n in altNames:
 		isPreferred = 1 if (n == preferredName) else 0
 		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'eol')", (nodeName, n, isPreferred))
-for name in nameToPickedPid: # Add manually-picked pids
-	pickedPid = nameToPickedPid[name]
-	usedPids.add(pickedPid)
-	if pickedPid != None:
-		addToDb(name, pickedPid)
-iterationNum = 0
-for (name,) in dbCur2.execute("SELECT name FROM nodes"):
-	iterationNum += 1
-	if iterationNum % 10000 == 0:
-		print(f"Loop 1 iteration {iterationNum}")
-	if name in nameToPickedPid:
+print("Adding picked IDs")
+for (name, pid) in nameToPickedPid.items():
+	if pid != None:
+		addToDb(name, pid)
+		usedPids.add(pid)
+print("Associating nodes with canonical names")
+iterNum = 0
+for (nodeName,) in dbCur2.execute("SELECT name FROM nodes"):
+	iterNum += 1
+	if iterNum % 1e5 == 0:
+		print(f"At iteration {iterNum}")
+	if nodeName in nameToPickedPid:
 		continue
-	# If name matches a canonical-name, add alt-name entries to 'names' table
-	if name in canonicalNameToPids:
+	# Check for matching canonical name
+	if nodeName in canonicalNameToPids:
 		pidToUse = None
-		for pid in canonicalNameToPids[name]:
+		# Pick an associated page ID
+		for pid in canonicalNameToPids[nodeName]:
 			hasLowerPrio = pid not in pidToPreferred and pidToUse in pidToPreferred
 			hasHigherPrio = pid in pidToPreferred and pidToUse not in pidToPreferred
 			if hasLowerPrio:
@@ -138,24 +159,26 @@ for (name,) in dbCur2.execute("SELECT name FROM nodes"):
 			if pid not in usedPids and (pidToUse == None or pid < pidToUse or hasHigherPrio):
 				pidToUse = pid
 		if pidToUse != None:
+			addToDb(nodeName, pidToUse)
 			usedPids.add(pidToUse)
-			addToDb(name, pidToUse)
-	elif name in nameToPids:
-		unresolvedNodeNames.add(name)
-# Iterate through unresolved nodes, resolving to vernacular-names
-iterationNum = 0
-for name in unresolvedNodeNames:
-	iterationNum += 1
-	if iterationNum % 100 == 0:
-		print(f"Loop 2 iteration {iterationNum}")
-	# Add alt-name entries to 'names' table for first corresponding pid
+	elif nodeName in nameToPids:
+		unresolvedNodeNames.add(nodeName)
+print("Associating leftover nodes with other names")
+iterNum = 0
+for nodeName in unresolvedNodeNames:
+	iterNum += 1
+	if iterNum % 100 == 0:
+		print(f"At iteration {iterNum}")
+	# Check for matching name
 	pidToUse = None
-	for pid in nameToPids[name]:
+	for pid in nameToPids[nodeName]:
+		# Pick an associated page ID
 		if pid not in usedPids and (pidToUse == None or pid < pidToUse):
 			pidToUse = pid
 	if pidToUse != None:
+		addToDb(nodeName, pidToUse)
 		usedPids.add(pidToUse)
-		addToDb(name, pidToUse)
-# Close db
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/genImgs.py b/backend/data/genImgs.py
index 097959f..ecca8e0 100755
--- a/backend/data/genImgs.py
+++ b/backend/data/genImgs.py
@@ -4,13 +4,18 @@ import sys, os, subprocess
 import sqlite3, urllib.parse
 import signal
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads a list of eol/enwiki images from a file, and generates web-usable versions.\n"
-usageInfo += "Uses smartcrop, and places resulting images in a directory, with name 'otolId1.jpg'.\n"
-usageInfo += "Also adds image metadata to an sqlite database.\n"
-usageInfo += "\n"
-usageInfo += "SIGINT can be used to stop conversion, and the program can be re-run to\n"
-usageInfo += "continue processing. It uses existing output files to decide where to continue from.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads node IDs and image paths from a file, and possibly from a directory,
+and generates cropped/resized versions of those images into a directory,
+with names of the form 'nodeId1.jpg'. Also adds image metadata to the
+database.
+
+SIGINT can be used to stop, and the program can be re-run to continue
+processing. It uses already-existing database entries to decide what
+to skip.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -23,19 +28,19 @@ pickedImgsDir = "pickedImgs/"
 pickedImgsFilename = "imgData.txt"
 dbFile = "data.db"
 IMG_OUT_SZ = 200
-genImgFiles = True
+genImgFiles = True # Usable for debugging
 
-# Create output directory if not present
 if not os.path.exists(outDir):
 	os.mkdir(outDir)
-# Open dbs
+
+print("Opening databases")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
 eolCon = sqlite3.connect(eolImgDb)
 eolCur = eolCon.cursor()
 enwikiCon = sqlite3.connect(enwikiImgDb)
 enwikiCur = enwikiCon.cursor()
-# Get 'picked images' info
+print("Checking for picked-images")
 nodeToPickedImg = {}
 if os.path.exists(pickedImgsDir + pickedImgsFilename):
 	lineNum = 0
@@ -49,29 +54,34 @@ if os.path.exists(pickedImgsDir + pickedImgsFilename):
 				"nodeName": nodeName, "id": lineNum,
 				"filename": filename, "url": url, "license": license, "artist": artist, "credit": credit,
 			}
-# Create image tables if not present
+
+print("Checking for image tables")
 nodesDone = set()
 imgsDone = set()
 if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='node_imgs'").fetchone() == None:
+	# Add image tables if not present
 	dbCur.execute("CREATE TABLE node_imgs (name TEXT PRIMARY KEY, img_id INT, src TEXT)")
 	dbCur.execute("CREATE TABLE images" \
 		" (id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src))")
 else:
-	# Get existing node-associations
+	# Get existing image-associated nodes
 	for (otolId,) in dbCur.execute("SELECT nodes.id FROM node_imgs INNER JOIN nodes ON node_imgs.name = nodes.name"):
 		nodesDone.add(otolId)
-	# And images
+	# Get existing node-associated images
 	for (imgId, imgSrc) in dbCur.execute("SELECT id, src from images"):
 		imgsDone.add((imgId, imgSrc))
-	print(f"Found {len(nodesDone)} nodes and {len(imgsDone)} images pre-existing")
-# Detect SIGINT signals
+	print(f"Found {len(nodesDone)} nodes and {len(imgsDone)} images to skip")
+
+# Set SIGINT handler
 interrupted = False
 def onSigint(sig, frame):
 	global interrupted
 	interrupted = True
 signal.signal(signal.SIGINT, onSigint)
-# Iterate though images to process
+
+print("Iterating through input images")
 def quit():
+	print("Closing databases")
 	dbCon.commit()
 	dbCon.close()
 	eolCon.close()
@@ -94,7 +104,7 @@ def convertImage(imgPath, outPath):
 		print(f"ERROR: smartcrop had exit status {completedProcess.returncode}")
 		return False
 	return True
-print("Processing picked images")
+print("Processing picked-images")
 for (otolId, imgData) in nodeToPickedImg.items():
 	# Check for SIGINT event
 	if interrupted:
@@ -105,7 +115,8 @@ for (otolId, imgData) in nodeToPickedImg.items():
 		continue
 	# Convert image
 	if genImgFiles:
-		if not convertImage(pickedImgsDir + imgData["filename"], outDir + otolId + ".jpg"):
+		success = convertImage(pickedImgsDir + imgData["filename"], outDir + otolId + ".jpg")
+		if not success:
 			quit()
 	else:
 		print(f"Processing {imgData['nodeName']}: {otolId}.jpg")
@@ -135,7 +146,8 @@ with open(imgListFile) as file:
 			continue
 		# Convert image
 		if genImgFiles:
-			if not convertImage(imgPath, outDir + otolId + ".jpg"):
+			success = convertImage(imgPath, outDir + otolId + ".jpg")
+			if not success:
 				break
 		else:
 			if iterNum % 1e4 == 0:
@@ -146,13 +158,13 @@ with open(imgListFile) as file:
 		imgName = os.path.basename(os.path.normpath(imgPath)) # Get last path component
 		imgName = os.path.splitext(imgName)[0] # Remove extension
 		if fromEol:
-			(eolId, _, contentId) = imgName.partition(" ")
-			(eolId, contentId) = (int(eolId), int(contentId))
+			eolId, _, contentId = imgName.partition(" ")
+			eolId, contentId = (int(eolId), int(contentId))
 			if (eolId, "eol") not in imgsDone:
 				query = "SELECT source_url, license, copyright_owner FROM images WHERE content_id = ?"
 				row = eolCur.execute(query, (contentId,)).fetchone()
 				if row == None:
-					print(f"ERROR: No image record for EOL ID {eolId}, content ID {contentId}", file=sys.stderr)
+					print(f"ERROR: No image record for EOL ID {eolId}, content ID {contentId}")
 					break
 				(url, license, owner) = row
 				dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
@@ -167,7 +179,7 @@ with open(imgListFile) as file:
 					" WHERE page_imgs.page_id = ?"
 				row = enwikiCur.execute(query, (enwikiId,)).fetchone()
 				if row == None:
-					print(f"ERROR: No image record for enwiki ID {enwikiId}", file=sys.stderr)
+					print(f"ERROR: No image record for enwiki ID {enwikiId}")
 					break
 				(name, license, artist, credit) = row
 				url = "https://en.wikipedia.org/wiki/File:" + urllib.parse.quote(name)
diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py
index 9fe07a2..a8e1322 100755
--- a/backend/data/genLinkedImgs.py
+++ b/backend/data/genLinkedImgs.py
@@ -3,9 +3,12 @@
 import sys, re
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Adds a table to data.db, associating nodes without images to\n"
-usageInfo += "usable child images.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Look for nodes without images in the database, and tries to
+associate them with images from their children.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -14,24 +17,22 @@ dbFile = "data.db"
 compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]")
 upPropagateCompoundImgs = False
 
-# Open db
+print("Opening databases")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
 dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)")
-	# Associates a node with one (or two) otol-ids with usable images,
-	# encoded as 'otolId1' or 'otolId1,otolId2'
-# Get nodes with images
+
 print("Getting nodes with images")
 resolvedNodes = {} # Will map node names to otol IDs with a usable image
 query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name"
 for (name, otolId) in dbCur.execute(query):
 	resolvedNodes[name] = otolId
-print(f"Got {len(resolvedNodes)} nodes")
-# Iterate through resolved nodes, resolving ancestors where able
-print("Resolving ancestor nodes")
-nodesToResolve = {}
-processedNodes = {}
-parentToChosenTips = {}
+print(f"Found {len(resolvedNodes)}")
+
+print("Iterating through nodes, trying to resolve images for ancestors")
+nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images
+processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used
+parentToChosenTips = {} # used to prefer images from children with more tips
 iterNum = 0
 while len(resolvedNodes) > 0:
 	iterNum += 1
@@ -43,13 +44,13 @@ while len(resolvedNodes) > 0:
 	# Traverse upwards, resolving ancestors if able
 	while True:
 		# Get parent
-		row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (nodeName,)).fetchone()
+		row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
 		if row == None or row[0] in processedNodes or row[0] in resolvedNodes:
 			break
 		parent = row[0]
 		# Get parent data
 		if parent not in nodesToResolve:
-			childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (parent,))]
+			childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))]
 			query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames)))
 			childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)]
 			childObjs.sort(key=lambda x: x["tips"], reverse=True)
@@ -66,7 +67,7 @@ while len(resolvedNodes) > 0:
 			nodeName = parent
 			continue
 		else:
-			# Add potential otol-id
+			# Mark child as a potential choice
 			childObj = next(c for c in childObjs if c["name"] == nodeName)
 			childObj["otolId"] = otolId
 			break
@@ -78,8 +79,8 @@ while len(resolvedNodes) > 0:
 			parentToChosenTips[name] = childObj["tips"]
 			dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"]))
 		nodesToResolve.clear()
-# Iterate through processed nodes with compound names
-print("Replacing images for compound-name nodes")
+
+print("Replacing linked-images for compound nodes")
 iterNum = 0
 for nodeName in processedNodes.keys():
 	iterNum += 1
@@ -106,7 +107,7 @@ for nodeName in processedNodes.keys():
 		if upPropagateCompoundImgs:
 			while True:
 				# Get parent
-				row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (nodeName,)).fetchone()
+				row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
 				if row != None:
 					parent = row[0]
 					# Check num tips
@@ -118,6 +119,7 @@ for nodeName in processedNodes.keys():
 						nodeName = parent
 						continue
 				break
-# Close db
+
+print("Closing databases")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py
index 87b35c3..36b6197 100755
--- a/backend/data/genOtolData.py
+++ b/backend/data/genOtolData.py
@@ -3,29 +3,33 @@
 import sys, re, os
 import json, sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads labelled_supertree_ottnames.tre & annotations.json (from an Open Tree of Life release),\n"
-usageInfo += "and creates a sqlite database, which holds entries of the form (name text, data text).\n"
-usageInfo += "Each row holds a tree-of-life node's name, JSON-encoded child name array, a parent name or '',\n"
-usageInfo += "number of descendant 'tips', and a 1 or 0 indicating phylogenetic-support.\n"
-usageInfo += "\n"
-usageInfo += "Expected labelled_supertree_ottnames.tre format:\n"
-usageInfo += "    Represents a tree-of-life in Newick format, roughly like (n1,n2,(n3,n4)n5)n6,\n"
-usageInfo += "    where root node is named n6, and has children n1, n2, and n5.\n"
-usageInfo += "    Name forms include Homo_sapiens_ott770315, mrcaott6ott22687, and 'Oxalis san-miguelii ott5748753'\n"
-usageInfo += "    Some names can be split up into a 'simple' name (like Homo_sapiens) and an id (like ott770315)\n"
-usageInfo += "Expected annotations.json format:\n"
-usageInfo += "    JSON object holding information about the tree-of-life release.\n"
-usageInfo += "    The object's 'nodes' field maps node IDs to objects holding information about that node,\n"
-usageInfo += "    such as phylogenetic trees that support/conflict with it's placement.\n"
-usageInfo += "\n"
-usageInfo += "Some node trimming is done on the extracted tree, for performance and relevance reasons.\n"
-usageInfo += "The app can get quite laggy when some nodes in the chain have over 10k children.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads files describing a tree-of-life from an 'Open Tree of Life' release,
+and stores tree information in a database.
+
+Reads a labelled_supertree_ottnames.tre file, which is assumed to have this format:
+    The tree-of-life is represented in Newick format, which looks like: (n1,n2,(n3,n4)n5)n6
+		The root node is named n6, and has children n1, n2, and n5.
+    Name examples include: Homo_sapiens_ott770315, mrcaott6ott22687, 'Oxalis san-miguelii ott5748753', 
+		'ott770315' and 'mrcaott6ott22687' are node IDs. The latter is for a 'compound node'.
+		The node with ID 'ott770315' will get the name 'homo sapiens'.
+		A compound node will get a name composed from it's sub-nodes (eg: [name1 + name2]).
+	It is possible for multiple nodes to have the same name.
+		In these cases, extra nodes will be named sequentially, as 'name1 [2]', 'name1 [3]', etc.
+Reads an annotations.json file, which is assumed to have this format:
+    Holds a JSON object, whose 'nodes' property maps node IDs to objects holding information about that node,
+    such as the properties 'supported_by' and 'conflicts_with', which list phylogenetic trees that
+	support/conflict with the node's placement.
+Reads from a picked-names file, if present, which specifies name and node ID pairs.
+	These help resolve cases where multiple nodes share the same name.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-treeFile = "otol/labelled_supertree_ottnames.tre"
+treeFile = "otol/labelled_supertree_ottnames.tre" # Had about 2.5e9 nodes
 annFile = "otol/annotations.json"
 dbFile = "data.db"
 nodeMap = {} # Maps node IDs to node objects
@@ -33,19 +37,32 @@ nameToFirstId = {} # Maps node names to first found ID (names might have multipl
 dupNameToIds = {} # Maps names of nodes with multiple IDs to those IDs
 pickedNamesFile = "pickedOtolNames.txt"
 
-# Parse treeFile
+class Node:
+	" Represents a tree-of-life node "
+	def __init__(self, name, childIds, parentId, tips, pSupport):
+		self.name = name
+		self.childIds = childIds
+		self.parentId = parentId
+		self.tips = tips
+		self.pSupport = pSupport
+
 print("Parsing tree file")
+# Read file
 data = None
 with open(treeFile) as file:
 	data = file.read()
 dataIdx = 0
+# Parse content
+iterNum = 0
 def parseNewick():
-	"""Parses a node using 'data' and 'dataIdx', updates nodeMap accordingly, and returns the node name or None"""
-	global dataIdx
+	" Parses a node using 'data' and 'dataIdx', updates nodeMap accordingly, and returns the node's ID "
+	global data, dataIdx, iterNum
+	iterNum += 1
+	if iterNum % 1e5 == 0:
+		print(f"At iteration {iterNum}")
 	# Check for EOF
 	if dataIdx == len(data):
-		print("ERROR: Unexpected EOF at index " + str(dataIdx), file=sys.stderr)
-		return None
+		raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
 	# Check for node
 	if data[dataIdx] == "(": # parse inner node
 		dataIdx += 1
@@ -53,12 +70,9 @@ def parseNewick():
 		while True:
 			# Read child
 			childId = parseNewick()
-			if childId == None:
-				return None
 			childIds.append(childId)
 			if (dataIdx == len(data)):
-				print("ERROR: Unexpected EOF", file=sys.stderr)
-				return None
+				raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
 			# Check for next child
 			if (data[dataIdx] == ","):
 				dataIdx += 1
@@ -66,33 +80,25 @@ def parseNewick():
 			else:
 				# Get node name and id
 				dataIdx += 1 # Consume an expected ')'
-				[name, id] = parseNewickName()
+				name, id = parseNewickName()
 				updateNameMaps(name, id)
 				# Get child num-tips total
 				tips = 0
 				for childId in childIds:
-					tips += nodeMap[childId]["tips"]
+					tips += nodeMap[childId].tips
 				# Add node to nodeMap
-				nodeMap[id] = {"name": name, "children": childIds, "parent": None, "tips": tips, "pSupport": False}
+				nodeMap[id] = Node(name, childIds, None, tips, False)
 				# Update childrens' parent reference
 				for childId in childIds:
-					nodeMap[childId]["parent"] = id
+					nodeMap[childId].parentId = id
 				return id
 	else: # Parse node name
-		[name, id] = parseNewickName()
+		name, id = parseNewickName()
 		updateNameMaps(name, id)
-		nodeMap[id] = {"name": name, "children": [], "parent": None, "tips": 1, "pSupport": False}
+		nodeMap[id] = Node(name, [], None, 1, False)
 		return id
-def updateNameMaps(name, id):
-	if name not in nameToFirstId:
-		nameToFirstId[name] = id
-	else:
-		if name not in dupNameToIds:
-			dupNameToIds[name] = [nameToFirstId[name], id]
-		else:
-			dupNameToIds[name].append(id)
 def parseNewickName():
-	"""Helper that parses an input node name, and returns a [name,id] pair"""
+	" Parses a node name using 'data' and 'dataIdx', and returns a (name, id) pair "
 	global data, dataIdx
 	name = None
 	end = dataIdx
@@ -102,7 +108,7 @@ def parseNewickName():
 		inQuote = True
 		while end < len(data):
 			if (data[end] == "'"):
-				if end + 1 < len(data) and data[end+1] == "'": # Account for '' as escaped-quote
+				if end + 1 < len(data) and data[end + 1] == "'": # Account for '' as escaped-quote
 					end += 2
 					continue
 				else:
@@ -111,75 +117,86 @@ def parseNewickName():
 					break
 			end += 1
 		if inQuote:
-			raise Exception("ERROR: Unexpected EOF")
+			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
 		name = data[dataIdx:end]
 		dataIdx = end
 	else:
 		while end < len(data) and not re.match(r"[(),]", data[end]):
 			end += 1
 		if (end == dataIdx):
-			raise Exception("ERROR: Unexpected EOF")
+			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
 		name = data[dataIdx:end].rstrip()
 		if end == len(data): # Ignore trailing input semicolon
 			name = name[:-1]
 		dataIdx = end
-	# Convert to [name, id]
+	# Convert to (name, id)
 	name = name.lower()
 	if name.startswith("mrca"):
-		return [name, name]
+		return (name, name)
 	elif name[0] == "'":
 		match = re.fullmatch(r"'([^\\\"]+) (ott\d+)'", name)
 		if match == None:
 			raise Exception(f"ERROR: invalid name \"{name}\"")
 		name = match.group(1).replace("''", "'")
-		return [name, match.group(2)]
+		return (name, match.group(2))
 	else:
 		match = re.fullmatch(r"([^\\\"]+)_(ott\d+)", name)
 		if match == None:
 			raise Exception(f"ERROR: invalid name \"{name}\"")
-		return [match.group(1).replace("_", " "), match.group(2)]
+		return (match.group(1).replace("_", " "), match.group(2))
+def updateNameMaps(name, id):
+	global nameToFirstId, dupNameToIds
+	if name not in nameToFirstId:
+		nameToFirstId[name] = id
+	else:
+		if name not in dupNameToIds:
+			dupNameToIds[name] = [nameToFirstId[name], id]
+		else:
+			dupNameToIds[name].append(id)
 rootId = parseNewick()
-# Resolve duplicate names
-print("Resolving duplicates")
+
+print("Resolving duplicate names")
+# Read picked-names file
 nameToPickedId = {}
 if os.path.exists(pickedNamesFile):
 	with open(pickedNamesFile) as file:
 		for line in file:
 			(name, _, otolId) = line.rstrip().partition("|")
 			nameToPickedId[name] = otolId
-for [dupName, ids] in dupNameToIds.items():
+# Resolve duplicates
+for (dupName, ids) in dupNameToIds.items():
 	# Check for picked id
 	if dupName in nameToPickedId:
 		idToUse = nameToPickedId[dupName]
 	else:
 		# Get conflicting node with most tips
-		tipNums = [nodeMap[id]["tips"] for id in ids]
+		tipNums = [nodeMap[id].tips for id in ids]
 		maxIdx = tipNums.index(max(tipNums))
 		idToUse = ids[maxIdx]
 	# Adjust name of other conflicting nodes
 	counter = 2
 	for id in ids:
 		if id != idToUse:
-			nodeMap[id]["name"] += " [" + str(counter)+ "]"
+			nodeMap[id].name += f" [{counter}]"
 			counter += 1
-# Change mrca* names
+
 print("Changing mrca* names")
 def convertMrcaName(id):
 	node = nodeMap[id]
-	name = node["name"]
-	childIds = node["children"]
+	name = node.name
+	childIds = node.childIds
 	if len(childIds) < 2:
-		print(f"WARNING: MRCA node \"{name}\" has less than 2 children", file=sys.stderr)
+		print(f"WARNING: MRCA node \"{name}\" has less than 2 children")
 		return
 	# Get 2 children with most tips
-	childTips = [nodeMap[id]["tips"] for id in childIds]
-	maxIdx = childTips.index(max(childTips))
-	childTips[maxIdx] = 0
+	childTips = [nodeMap[id].tips for id in childIds]
+	maxIdx1 = childTips.index(max(childTips))
+	childTips[maxIdx1] = 0
 	maxIdx2 = childTips.index(max(childTips))
-	childId1 = childIds[maxIdx]
+	childId1 = childIds[maxIdx1]
 	childId2 = childIds[maxIdx2]
-	childName1 = nodeMap[childId1]["name"]
-	childName2 = nodeMap[childId2]["name"]
+	childName1 = nodeMap[childId1].name
+	childName2 = nodeMap[childId2].name
 	# Check for mrca* child names
 	if childName1.startswith("mrca"):
 		childName1 = convertMrcaName(childId1)
@@ -193,44 +210,44 @@ def convertMrcaName(id):
 	if match != None:
 		childName2 = match.group(1)
 	# Create composite name
-	node["name"] = f"[{childName1} + {childName2}]"
+	node.name = f"[{childName1} + {childName2}]"
 	return childName1
-for [id, node] in nodeMap.items():
-	if node["name"].startswith("mrca"):
+for (id, node) in nodeMap.items():
+	if node.name.startswith("mrca"):
 		convertMrcaName(id)
-# Parse annFile
+
 print("Parsing annotations file")
+# Read file
 data = None
 with open(annFile) as file:
 	data = file.read()
 obj = json.loads(data)
-nodeAnnsMap = obj['nodes']
-# Add annotations data
-print("Adding annotation data")
-for [id, node] in nodeMap.items():
+nodeAnnsMap = obj["nodes"]
+# Find relevant annotations
+for (id, node) in nodeMap.items():
 	# Set has-support value using annotations
 	if id in nodeAnnsMap:
 		nodeAnns = nodeAnnsMap[id]
 		supportQty = len(nodeAnns["supported_by"]) if "supported_by" in nodeAnns else 0
 		conflictQty = len(nodeAnns["conflicts_with"]) if "conflicts_with" in nodeAnns else 0
-		node["pSupport"] = supportQty > 0 and conflictQty == 0
+		node.pSupport = supportQty > 0 and conflictQty == 0
 	# Root node gets support
-	if node["parent"] == None:
-		node["pSupport"] = True
-# Create db
+	if node.parentId == None:
+		node.pSupport = True
+
 print("Creating nodes and edges tables")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
 dbCur.execute("CREATE TABLE nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
 dbCur.execute("CREATE INDEX nodes_idx_nc ON nodes(name COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE edges (node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child))")
+dbCur.execute("CREATE TABLE edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
 dbCur.execute("CREATE INDEX edges_child_idx ON edges(child)")
 for (otolId, node) in nodeMap.items():
-	dbCur.execute("INSERT INTO nodes VALUES (?, ?, ?)", (node["name"], otolId, node["tips"]))
-	childIds = node["children"]
-	for childId in childIds:
+	dbCur.execute("INSERT INTO nodes VALUES (?, ?, ?)", (node.name, otolId, node.tips))
+	for childId in node.childIds:
 		childNode = nodeMap[childId]
 		dbCur.execute("INSERT INTO edges VALUES (?, ?, ?)",
-			(node["name"], childNode["name"], 1 if childNode["pSupport"] else 0))
+			(node.name, childNode.name, 1 if childNode.pSupport else 0))
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/genReducedTreeData.py b/backend/data/genReducedTreeData.py
index b475794..2e56bba 100755
--- a/backend/data/genReducedTreeData.py
+++ b/backend/data/genReducedTreeData.py
@@ -3,123 +3,131 @@
 import sys, os.path, re
 import json, sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Reads \n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Creates a reduced version of the tree in the database.
+Reads a subset of the node names from a file, and creates a
+minimal tree that contains them, possibly with a few extras.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
 dbFile = "data.db"
-nodeNamesFile = "reducedTreeNodes.txt"
+nodeNamesFile = "pickedReducedNodes.txt"
 minimalNames = set()
 nodeMap = {} # Maps node names to node objects
 PREF_NUM_CHILDREN = 3 # Attempt inclusion of children up to this limit
-compNameRegex = re.compile(r"\[.+ \+ .+]")
+compNameRegex = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes
+
+class Node:
+	" Represents a node from the database "
+	def __init__(self, id, children, parent, tips, pSupport):
+		self.id = id
+		self.children = children
+		self.parent = parent
+		self.tips = tips
+		self.pSupport = pSupport
 
-# Connect to db
+print("Opening database")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Read in minimal set of node names
+
 print("Getting minimal name set")
 iterNum = 0
 with open(nodeNamesFile) as file:
 	for line in file:
 		iterNum += 1
 		if iterNum % 100 == 0:
-			print(f"Iteration {iterNum}")
+			print(f"At iteration {iterNum}")
 		#
-		row = dbCur.execute("SELECT name from nodes WHERE name = ?", (line.rstrip(),)).fetchone()
+		name = line.rstrip()
+		row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone()
 		if row == None:
-			row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (line.rstrip(),)).fetchone()
+			row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone()
 		if row != None:
 			minimalNames.add(row[0])
 if len(minimalNames) == 0:
-	print("ERROR: No names found", file=sys.stderr)
-	sys.exit(1)
-print(f"Name set has {len(minimalNames)} names")
-# Add nodes that connect up to root
-print("Getting connected nodes set")
-iterNum = 0
+	print("No names found")
+	sys.exit(0)
+print(f"Result has {len(minimalNames)} names")
+
+print("Getting ancestor nodes")
 rootName = None
+iterNum = 0
 for name in minimalNames:
 	iterNum += 1
 	if iterNum % 100 == 0:
-		print(f"Iteration {iterNum}")
+		print(f"At iteration {iterNum}")
 	#
 	prevName = None
 	while name != None:
 		if name not in nodeMap:
 			(id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone()
-			row = dbCur.execute("SELECT node, p_support from edges where child = ?", (name,)).fetchone()
+			row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone()
 			parent = None if row == None or row[0] == "" else row[0]
-			pSupport = 1 if row == None or row[1] == 1 else 0
-			nodeMap[name] = {
-				"id": id,
-				"children": [] if prevName == None else [prevName],
-				"parent": parent,
-				"tips": 0,
-				"pSupport": pSupport,
-			}
+			pSupport = row == None or row[1] == 1
+			children = [] if prevName == None else [prevName]
+			nodeMap[name] = Node(id, children, parent, 0, pSupport)
 			prevName = name
 			name = parent
 		else:
 			if prevName != None:
-				nodeMap[name]["children"].append(prevName)
+				nodeMap[name].children.append(prevName)
 			break
 	if name == None:
 		rootName = prevName
-print(f"New node set has {len(nodeMap)} nodes")
-# Merge-upward compsite-named nodes
-print("Merging-upward composite-named nodes")
+print(f"Result has {len(nodeMap)} nodes")
+
+print("Merging-upward composite nodes")
 namesToRemove = set()
-for (name, nodeObj) in nodeMap.items():
-	parent = nodeObj["parent"]
+for (name, node) in nodeMap.items():
+	parent = node.parent
 	if parent != None and compNameRegex.fullmatch(name) != None:
 		# Connect children to parent
-		nodeMap[parent]["children"].remove(name)
-		nodeMap[parent]["children"].extend(nodeObj["children"])
-		for n in nodeObj["children"]:
-			nodeMap[n]["parent"] = parent
-			nodeMap[n]["pSupport"] &= nodeObj["pSupport"]
+		nodeMap[parent].children.remove(name)
+		nodeMap[parent].children.extend(node.children)
+		for n in node.children:
+			nodeMap[n].parent = parent
+			nodeMap[n].pSupport &= node.pSupport
 		# Remember for removal
 		namesToRemove.add(name)
 for name in namesToRemove:
 	del nodeMap[name]
-print(f"New node set has {len(nodeMap)} nodes")
-# Remove certain 'chain collapsible' nodes
+print(f"Result has {len(nodeMap)} nodes")
+
 print("Removing 'chain collapsible' nodes")
 namesToRemove2 = set()
-for (name, nodeObj) in nodeMap.items():
-	hasOneChild = len(nodeObj["children"]) == 1
-	isOnlyChild = nodeObj["parent"] != None and len(nodeMap[nodeObj["parent"]]["children"]) == 1
+for (name, node) in nodeMap.items():
+	hasOneChild = len(node.children) == 1
+	isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1
 	if name not in minimalNames and (hasOneChild or isOnlyChild):
-		parentName = nodeObj["parent"]
-		children = nodeObj["children"]
+		parent = node.parent
 		# Connect parent and children
-		nodeMap[parentName]["children"].remove(name)
-		nodeMap[parentName]["children"].extend(children)
-		for n in children:
-			nodeMap[n]["parent"] = parentName
-			# Adjust child pSupport
-			nodeMap[n]["pSupport"] &= nodeObj["pSupport"]
+		nodeMap[parent].children.remove(name)
+		nodeMap[parent].children.extend(node.children)
+		for n in node.children:
+			nodeMap[n].parent = parent
+			nodeMap[n].pSupport &= node.pSupport
 		# Remember for removal
 		namesToRemove2.add(name)
 for name in namesToRemove2:
 	del nodeMap[name]
 	namesToRemove.add(name)
-print(f"New node set has {len(nodeMap)} nodes")
-# Add some connected children
-print("Adding additional nearby children")
+print(f"Result has {len(nodeMap)} nodes")
+
+print("Adding some additional nearby children")
 namesToAdd = []
 iterNum = 0
-for (name, nodeObj) in nodeMap.items():
+for (name, node) in nodeMap.items():
 	iterNum += 1
 	if iterNum % 100 == 0:
-		print(f"Iteration {iterNum}")
+		print(f"At iteration {iterNum}")
 	#
-	numChildren = len(nodeObj["children"])
+	numChildren = len(node.children)
 	if numChildren < PREF_NUM_CHILDREN:
-		children = [row[0] for row in dbCur.execute("SELECT child FROM edges where node = ?", (name,))]
+		children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))]
 		newChildren = []
 		for n in children:
 			if n in nodeMap or n in namesToRemove:
@@ -132,43 +140,38 @@ for (name, nodeObj) in nodeMap.items():
 				continue
 			newChildren.append(n)
 		newChildNames = newChildren[:max(0, PREF_NUM_CHILDREN - numChildren)]
-		nodeObj["children"].extend(newChildNames)
+		node.children.extend(newChildNames)
 		namesToAdd.extend(newChildNames)
 for name in namesToAdd:
-	(parent, pSupport) = dbCur.execute("SELECT node, p_support from edges WHERE child = ?", (name,)).fetchone()
+	parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone()
 	(id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone()
 	parent = None if parent == "" else parent
-	nodeMap[name] = {
-		"id": id,
-		"children": [],
-		"parent": parent,
-		"tips": 0,
-		"pSupport": pSupport == 1,
-	}
-print(f"New node set has {len(nodeMap)} nodes")
-# set tips vals
-print("Setting tips vals")
+	nodeMap[name] = Node(id, [], parent, 0, pSupport == 1)
+print(f"Result has {len(nodeMap)} nodes")
+
+print("Setting 'tips' values")
 def setTips(nodeName):
-	nodeObj = nodeMap[nodeName]
-	if len(nodeObj["children"]) == 0:
-		nodeObj["tips"] = 1
+	node = nodeMap[nodeName]
+	if len(node.children) == 0:
+		node.tips = 1
 		return 1
-	tips = sum([setTips(childName) for childName in nodeObj["children"]])
-	nodeObj["tips"] = tips
+	tips = sum([setTips(childName) for childName in node.children])
+	node.tips = tips
 	return tips
 setTips(rootName)
-# Add new nodes to db
-print("Adding to db")
+
+print("Adding reduced tree to database")
 dbCur.execute("CREATE TABLE r_nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
 dbCur.execute("CREATE INDEX r_nodes_idx_nc ON r_nodes(name COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE r_edges (node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child))")
+dbCur.execute("CREATE TABLE r_edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
 dbCur.execute("CREATE INDEX r_edges_child_idx ON r_edges(child)")
-for (name, nodeObj) in nodeMap.items():
-	parentName = "" if nodeObj["parent"] == None else nodeObj["parent"]
-	dbCur.execute("INSERT INTO r_nodes VALUES (?, ?, ?)", (name, nodeObj["id"], nodeObj["tips"]))
-	for childName in nodeObj["children"]:
-		pSupport = 1 if nodeMap[childName]["pSupport"] else 0
+for (name, node) in nodeMap.items():
+	parentName = "" if node.parent == None else node.parent
+	dbCur.execute("INSERT INTO r_nodes VALUES (?, ?, ?)", (name, node.id, node.tips))
+	for childName in node.children:
+		pSupport = 1 if nodeMap[childName].pSupport else 0
 		dbCur.execute("INSERT INTO r_edges VALUES (?, ?, ?)", (name, childName, pSupport))
-# Close db
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/data/reviewImgsToGen.py b/backend/data/reviewImgsToGen.py
index 4d970ba..de592f5 100755
--- a/backend/data/reviewImgsToGen.py
+++ b/backend/data/reviewImgsToGen.py
@@ -7,15 +7,18 @@ from tkinter import ttk
 import PIL
 from PIL import ImageTk, Image, ImageOps
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Provides a GUI that displays, for each tol-node, an associated image from\n"
-usageInfo += "eol/* and enwiki/*, and enables the user to choose which to use. Writes\n"
-usageInfo += "choice data to a text file with lines of the form 'otolId1 imgPath1', or\n"
-usageInfo += "'otolId1', where no path indicates a choice of no image.\n"
-usageInfo += "\n"
-usageInfo += "The program can be closed, and run again to continue from the last choice.\n"
-usageInfo += "The program looks for an existing output file to determine what choices\n"
-usageInfo += "have already been made.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Provides a GUI that displays, for each node in the database, associated
+images from EOL and Wikipedia, and allows choosing which to use. Writes
+choice data to a text file with lines of the form 'otolId1 imgPath1', or
+'otolId1', where no path indicates a choice of no image.
+
+The program can be closed, and run again to continue from the last choice.
+The program looks for an existing output file to determine what choices
+have already been made.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
@@ -28,16 +31,18 @@ IMG_DISPLAY_SZ = 400
 PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135))
 onlyReviewPairs = True
 
-# Open db
+print("Opening database")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Associate nodes with images
-nodeToImgs = {} # Maps otol-ids to img-path arrays
-print("Looking through EOL images")
+
+nodeToImgs = {} # Maps otol-ids to arrays of image paths
+print("Iterating through images from EOL")
 if os.path.exists(eolImgDir):
 	for filename in os.listdir(eolImgDir):
-		(eolId, _, _) = filename.partition(" ")
+		# Get associated EOL ID
+		eolId, _, _ = filename.partition(" ")
 		query = "SELECT nodes.id FROM nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name WHERE eol_ids.id = ?"
+		# Get associated node IDs
 		found = False
 		for (otolId,) in dbCur.execute(query, (int(eolId),)):
 			if otolId not in nodeToImgs:
@@ -45,13 +50,15 @@ if os.path.exists(eolImgDir):
 			nodeToImgs[otolId].append(eolImgDir + filename)
 			found = True
 		if not found:
-			print(f"No node found for {eolImgDir}{filename}", file=sys.stderr)
-print(f"Result has {len(nodeToImgs)} node entries")
-print("Looking through enwiki images")
+			print(f"WARNING: No node found for {eolImgDir}{filename}")
+print(f"Result: {len(nodeToImgs)} nodes with images")
+print("Iterating through images from Wikipedia")
 if os.path.exists(enwikiImgDir):
 	for filename in os.listdir(enwikiImgDir):
+		# Get associated page ID
 		(wikiId, _, _) = filename.partition(".")
-		query = "SELECT nodes.id FROM nodes INNER JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids._id = ?"
+		# Get associated node IDs
+		query = "SELECT nodes.id FROM nodes INNER JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id = ?"
 		found = False
 		for (otolId,) in dbCur.execute(query, (int(wikiId),)):
 			if otolId not in nodeToImgs:
@@ -59,10 +66,9 @@ if os.path.exists(enwikiImgDir):
 			nodeToImgs[otolId].append(enwikiImgDir + filename)
 			found = True
 		if not found:
-			print(f"No node found for {enwikiImgDir}{filename}", file=sys.stderr)
-print(f"Result has {len(nodeToImgs)} node entries")
-# Check for already-made choices
-print("Filtering out already-chosen IDs")
+			print(f"WARNING: No node found for {enwikiImgDir}{filename}")
+print(f"Result: {len(nodeToImgs)} nodes with images")
+print("Filtering out already-made image choices")
 oldSz = len(nodeToImgs)
 if os.path.exists(outFile):
 	with open(outFile) as file:
@@ -74,7 +80,7 @@ if os.path.exists(outFile):
 print(f"Filtered out {oldSz - len(nodeToImgs)} entries")
 
 class ImgReviewer:
-	""" Provides the GUI for reviewing images """
+	" Provides the GUI for reviewing images "
 	def __init__(self, root, nodeToImgs):
 		self.root = root
 		root.title("Image Reviewer")
@@ -96,7 +102,7 @@ class ImgReviewer:
 		# Add padding
 		for child in mainFrame.winfo_children():
 			child.grid_configure(padx=5, pady=5)
-		# Add bindings
+		# Add keyboard bindings
 		root.bind("<q>", self.quit)
 		root.bind("<Key-j>", lambda evt: self.accept(0))
 		root.bind("<Key-k>", lambda evt: self.accept(1))
@@ -112,7 +118,7 @@ class ImgReviewer:
 		# Initialise images to review
 		self.getNextImgs()
 	def getNextImgs(self):
-		""" Updates display with new images to review, or ends program """
+		" Updates display with new images to review, or ends program "
 		# Get next image paths
 		while True:
 			self.listIdx += 1
@@ -120,7 +126,7 @@ class ImgReviewer:
 				print("No more images to review. Exiting program.")
 				self.quit()
 				return
-			(self.otolId, imgPaths) = self.nodeImgsList[self.listIdx]
+			self.otolId, imgPaths = self.nodeImgsList[self.listIdx]
 			# Potentially skip user choice
 			if onlyReviewPairs and len(imgPaths) == 1:
 				with open(outFile, 'a') as file:
@@ -141,12 +147,12 @@ class ImgReviewer:
 				continue
 			if imgPath.startswith("eol/"):
 				self.eolImgPath = imgPath
-				self.eolImg = ImageTk.PhotoImage(self.resizeForDisplay(img))
+				self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
 			elif imgPath.startswith("enwiki/"):
 				self.enwikiImgPath = imgPath
-				self.enwikiImg = ImageTk.PhotoImage(self.resizeForDisplay(img))
+				self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
 			else:
-				print(f"Unexpected image path {imgPath}", file=sys.stderr)
+				print(f"Unexpected image path {imgPath}")
 				self.quit()
 				return
 		# Re-iterate if all image paths invalid
@@ -157,14 +163,14 @@ class ImgReviewer:
 			return
 		# Add placeholder images
 		if self.eolImgPath == None:
-			self.eolImg = ImageTk.PhotoImage(self.resizeForDisplay(PLACEHOLDER_IMG))
+			self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
 		elif self.enwikiImgPath == None:
-			self.enwikiImg = ImageTk.PhotoImage(self.resizeForDisplay(PLACEHOLDER_IMG))
+			self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
 		# Update image-frames
 		self.labels[0].config(image=self.eolImg)
 		self.labels[1].config(image=self.enwikiImg)
 		# Update title
-		title = f"Imgs for otol ID {self.otolId}"
+		title = f"Images for otol ID {self.otolId}"
 		query = "SELECT names.alt_name FROM" \
 			" nodes INNER JOIN names ON nodes.name = names.name" \
 			" WHERE nodes.id = ? and pref_alt = 1"
@@ -174,7 +180,7 @@ class ImgReviewer:
 		title += f" ({self.listIdx + 1} out of {len(self.nodeImgsList)})"
 		self.root.title(title)
 	def accept(self, imgIdx):
-		""" React to a user selecting an image """
+		" React to a user selecting an image "
 		imgPath = self.eolImgPath if imgIdx == 0 else self.enwikiImgPath
 		if imgPath == None:
 			print("Invalid selection")
@@ -184,12 +190,13 @@ class ImgReviewer:
 		self.numReviewed += 1
 		self.getNextImgs()
 	def reject(self):
-		""" React to a user rejecting all images of a set """
+		" React to a user rejecting all images of a set "
 		with open(outFile, 'a') as file:
 			file.write(f"{self.otolId}\n")
 		self.numReviewed += 1
 		self.getNextImgs()
 	def quit(self, e = None):
+		global dbCon
 		print(f"Number reviewed: {self.numReviewed}")
 		timeElapsed = time.time() - self.startTime
 		print(f"Time elapsed: {timeElapsed:.2f} seconds")
@@ -197,8 +204,8 @@ class ImgReviewer:
 			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
 		dbCon.close()
 		self.root.destroy()
-	def resizeForDisplay(self, img):
-		""" Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """
+	def resizeImgForDisplay(self, img):
+		" Returns a copy of an image, shrunk to fit it's frame (keeps aspect ratio), and with a background "
 		if max(img.width, img.height) > IMG_DISPLAY_SZ:
 			if (img.width > img.height):
 				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
@@ -212,6 +219,7 @@ class ImgReviewer:
 			int((IMG_DISPLAY_SZ - img.height) / 2)))
 		return bgImg
 # Create GUI and defer control
+print("Starting GUI")
 root = tki.Tk()
 ImgReviewer(root, nodeToImgs)
 root.mainloop()
diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py
index 302ea0d..fa269d8 100755
--- a/backend/data/trimTree.py
+++ b/backend/data/trimTree.py
@@ -3,21 +3,25 @@
 import sys
 import sqlite3
 
-usageInfo =  f"usage: {sys.argv[0]}\n"
-usageInfo += "Removes certain children from a tol-tree in an sqlite db.\n"
-usageInfo += "Looks for nodes with an amount of children above a threshold,\n"
-usageInfo += "and removes the excess, excluding those with 'significant'\n"
-usageInfo += "associations, like those with descriptions and images.\n"
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Tries to remove 'low significance' nodes from the database. Currently
+removes nodes that don't have an image or description, or a presence in
+the reduced tree. Also, for nodes with 'many' children, trims some more,
+ignoring the presence of node descriptions.
+"""
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
 dbFile = "data.db"
-softChildLimit = 500
+softChildLimit = 500 # Used to determine when a node has 'many' children
 
+print("Opening database")
 dbCon = sqlite3.connect(dbFile)
 dbCur = dbCon.cursor()
-# Get nodes that shouldn't be deleted, along with their ancestors
+
 print("Finding nodes to keep")
 nodesToKeep = set()
 nodesToStronglyKeep = set()
@@ -41,25 +45,26 @@ for name in nodesToKeep:
 		print(f"\tAt iteration {iterNum}")
 	#
 	while True:
-		row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (name,)).fetchone()
+		row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (name,)).fetchone()
 		if row != None:
 			parent = row[0]
 			if parent not in nodesToKeep and parent not in ancestors:
 				ancestors.add(parent)
-				if name in nodesToStronglyKeep:
+				if name not in nodesToStronglyKeep:
 					nodesToStronglyKeep.add(parent)
 				name = parent
 				continue
 		break
 nodesToKeep.update(ancestors)
-print(f"Total of {len(nodesToKeep)} nodes to keep")
+print(f"Result: {len(nodesToKeep)} nodes to keep")
+
 # Find root node
-query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.node IS NULL LIMIT 1"
+query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
 (rootName,) = dbCur.execute(query).fetchone()
-print(f"Found root node '{rootName}'")
-# Traverse tree, looking for trimmable nodes
+print(f"Found root node \"{rootName}\"")
+
 print("Looking for trimmable nodes")
-nodeToTipsChg = {}
+nodeToTipsChg = {} # Used to update 'tips' values after trimming
 nodesToDelete = set()
 iterNum = 0
 def findTrimmables(nodeName):
@@ -68,15 +73,15 @@ def findTrimmables(nodeName):
 	if iterNum % 1e4 == 0:
 		print(f"At iteration {iterNum}")
 	#
-	childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (nodeName,))]
+	childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))]
 	childrenToKeep, otherChildren = set(), set()
 	for n in childNames:
 		if n in nodesToKeep:
 			childrenToKeep.add(n)
 		else:
 			otherChildren.add(n)
-	# Check soft limit
 	tipsRemoved = 0
+	# Check soft limit
 	if len(childrenToKeep) > softChildLimit:
 		numToTrim = len(childrenToKeep) - softChildLimit
 		# Try removing weakly-kept nodes, preferring those with less tips
@@ -88,7 +93,7 @@ def findTrimmables(nodeName):
 		candidatesToTrim.sort(key=lambda n: childToTips[n], reverse=True)
 		otherChildren.update(candidatesToTrim[-numToTrim:])
 		childrenToKeep.difference_update(candidatesToTrim[-numToTrim:])
-	# 'Simulate' deletions
+	# Mark nodes for deletion
 	for n in otherChildren:
 		tipsRemoved += markForDeletion(n)
 	# Recurse on children
@@ -99,7 +104,7 @@ def findTrimmables(nodeName):
 	return tipsRemoved
 def markForDeletion(nodeName):
 	nodesToDelete.add(nodeName)
-	childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (nodeName,))]
+	childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))]
 	if len(childNames) == 0:
 		return 1
 	else:
@@ -108,7 +113,7 @@ def markForDeletion(nodeName):
 			tipsRemoved += markForDeletion(n)
 		return tipsRemoved
 findTrimmables(rootName)
-# Delete trimmable nodes
+
 print(f"Deleting {len(nodesToDelete)} nodes")
 iterNum = 0
 for nodeName in nodesToDelete:
@@ -117,10 +122,13 @@ for nodeName in nodesToDelete:
 		print(f"At iteration {iterNum}")
 	#
 	dbCur.execute("DELETE FROM nodes WHERE name = ?", (nodeName,))
-	dbCur.execute("DELETE FROM edges WHERE node = ?", (nodeName,))
+	dbCur.execute("DELETE FROM edges WHERE parent = ?", (nodeName,))
 	dbCur.execute("DELETE FROM edges WHERE child = ?", (nodeName,))
 	dbCur.execute("DELETE FROM names WHERE name = ?", (nodeName,))
-	dbCur.execute("DELETE FROM eol_ids WHERE name = ?", (nodeName,))
+	# Could also delete from 'eol_ids', 'wiki_ids', and 'descs', but this
+		# makes it much harder to restore the original data if needed, and
+		# the memory savings didn't seem significant.
+
 print(f"Updating num-tips for {len(nodeToTipsChg)} nodes")
 iterNum = 0
 for (nodeName, tipsChg) in nodeToTipsChg.items():
@@ -129,6 +137,7 @@ for (nodeName, tipsChg) in nodeToTipsChg.items():
 		print(f"At iteration {iterNum}")
 	#
 	dbCur.execute("UPDATE nodes SET tips = tips - ? WHERE name = ?", (tipsChg, nodeName))
-# Close db
+
+print("Closing database")
 dbCon.commit()
 dbCon.close()
diff --git a/backend/server.py b/backend/server.py
index 888f73a..4a364c3 100755
--- a/backend/server.py
+++ b/backend/server.py
@@ -28,7 +28,7 @@ if len(sys.argv) > 1:
 
 # Classes for objects sent as responses (matches lib.ts types in client-side code)
 class TolNode:
-	""" Used when responding to 'node' and 'chain' requests """
+	" Used when responding to 'node' and 'chain' requests "
 	def __init__(self, otolId, children, parent=None, tips=0, pSupport=False, commonName=None, imgName=None):
 		self.otolId = otolId         # string | null
 		self.children = children     # string[]
@@ -38,24 +38,24 @@ class TolNode:
 		self.commonName = commonName # null | string
 		self.imgName = imgName       # null | string | [string,string] | [null, string] | [string, null]
 class SearchSugg:
-	""" Represents a search suggestion """
+	" Represents a search suggestion "
 	def __init__(self, name, canonicalName=None):
 		self.name = name                   # string
 		self.canonicalName = canonicalName # string | null
 class SearchSuggResponse:
-	""" Sent as responses to 'search' requests """
+	" Sent as responses to 'search' requests "
 	def __init__(self, searchSuggs, hasMore):
 		self.suggs = searchSuggs # SearchSugg[]
 		self.hasMore = hasMore   # boolean
 class DescInfo:
-	""" Represents a tol-node's associated description """
+	" Represents a tol-node's associated description "
 	def __init__(self, text, wikiId, fromRedirect, fromDbp):
 		self.text = text                 # string
 		self.wikiId = wikiId             # number
 		self.fromRedirect = fromRedirect # boolean
 		self.fromDbp = fromDbp           # boolean
 class ImgInfo:
-	""" Represents a tol-node's associated image """
+	" Represents a tol-node's associated image "
 	def __init__(self, id, src, url, license, artist, credit):
 		self.id = id           # number
 		self.src = src         # string
@@ -64,7 +64,7 @@ class ImgInfo:
 		self.artist = artist   # string
 		self.credit = credit   # string
 class InfoResponse:
-	""" Sent as responses to 'info' requests """
+	" Sent as responses to 'info' requests "
 	def __init__(self, tolNode, descData, imgData):
 		self.tolNode = tolNode   # null | TolNode
 		self.descData = descData # null | DescInfo | [DescInfo, DescInfo]
@@ -84,7 +84,7 @@ def lookupNodes(names, useReducedTree):
 	for (nodeName, otolId, tips) in cur.execute(query, names):
 		nameToNodes[nodeName] = TolNode(otolId, [], tips=tips)
 	# Get child info
-	query = f"SELECT node, child FROM {edgesTable} WHERE node IN ({queryParamStr})"
+	query = f"SELECT parent, child FROM {edgesTable} WHERE parent IN ({queryParamStr})"
 	for (nodeName, childName) in cur.execute(query, names):
 		nameToNodes[nodeName].children.append(childName)
 	# Order children by tips
@@ -96,7 +96,7 @@ def lookupNodes(names, useReducedTree):
 			childToTips[n] = tips
 		node.children.sort(key=lambda n: childToTips[n], reverse=True)
 	# Get parent info
-	query = f"SELECT node, child, p_support FROM {edgesTable} WHERE child IN ({queryParamStr})"
+	query = f"SELECT parent, child, p_support FROM {edgesTable} WHERE child IN ({queryParamStr})"
 	for (nodeName, childName, pSupport) in cur.execute(query, names):
 		nameToNodes[childName].parent = nodeName
 		nameToNodes[childName].pSupport = (pSupport == 1)
author	Terry Truong <terry06890@gmail.com>	2022-06-22 23:16:42 +1000
committer	Terry Truong <terry06890@gmail.com>	2022-06-22 23:16:42 +1000
commit	abb936f5d76f7fe5cec1e8948d287da86643d504 (patch)
tree	f07b9eaadf5ae91363fdbac9d81b74e1fb0a436f
parent	e78c4df403e5f98afa08f7a0841ff233d5f6d05b (diff)