From 5fe71ea7b9d9a5d2dc6e8e5ce5b9193629eed74d Mon Sep 17 00:00:00 2001
From: Terry Truong <terry06890@gmail.com>
Date: Mon, 11 Jul 2022 01:54:08 +1000
Subject: Make backend dev server script serve the image files

Previously, image files in backend/data/img were moved to, or
symlinked from, public/. This needed to be changed before each
build, otherwise vite would end up copying gigabytes of images.
---
 .gitignore                                       |  52 ++--
 backend/data/README.md                           | 152 -----------
 backend/data/addPickedNames.py                   |  57 ----
 backend/data/dbpedia/README.md                   |  29 --
 backend/data/dbpedia/genDescData.py              | 130 ---------
 backend/data/enwiki/README.md                    |  52 ----
 backend/data/enwiki/downloadImgLicenseInfo.py    | 150 -----------
 backend/data/enwiki/downloadImgs.py              |  91 -------
 backend/data/enwiki/genDescData.py               | 127 ---------
 backend/data/enwiki/genDumpIndexDb.py            |  58 ----
 backend/data/enwiki/genImgData.py                | 190 -------------
 backend/data/enwiki/lookupPage.py                |  68 -----
 backend/data/eol/README.md                       |  26 --
 backend/data/eol/downloadImgs.py                 | 147 ----------
 backend/data/eol/genImagesListDb.sh              |  12 -
 backend/data/eol/reviewImgs.py                   | 205 --------------
 backend/data/genDbpData.py                       | 247 -----------------
 backend/data/genEnwikiDescData.py                | 102 -------
 backend/data/genEnwikiNameData.py                |  76 ------
 backend/data/genEolNameData.py                   | 184 -------------
 backend/data/genImgs.py                          | 191 -------------
 backend/data/genLinkedImgs.py                    | 125 ---------
 backend/data/genOtolData.py                      | 250 -----------------
 backend/data/genReducedTrees.py                  | 329 -----------------------
 backend/data/otol/README.md                      |  10 -
 backend/data/pickedImgs/README.md                |  10 -
 backend/data/reviewImgsToGen.py                  | 225 ----------------
 backend/server.py                                |  28 +-
 backend/tilo.py                                  |   2 +-
 backend/tolData/README.md                        | 152 +++++++++++
 backend/tolData/addPickedNames.py                |  57 ++++
 backend/tolData/dbpedia/README.md                |  29 ++
 backend/tolData/dbpedia/genDescData.py           | 130 +++++++++
 backend/tolData/enwiki/README.md                 |  52 ++++
 backend/tolData/enwiki/downloadImgLicenseInfo.py | 150 +++++++++++
 backend/tolData/enwiki/downloadImgs.py           |  91 +++++++
 backend/tolData/enwiki/genDescData.py            | 127 +++++++++
 backend/tolData/enwiki/genDumpIndexDb.py         |  58 ++++
 backend/tolData/enwiki/genImgData.py             | 190 +++++++++++++
 backend/tolData/enwiki/lookupPage.py             |  68 +++++
 backend/tolData/eol/README.md                    |  26 ++
 backend/tolData/eol/downloadImgs.py              | 147 ++++++++++
 backend/tolData/eol/genImagesListDb.sh           |  12 +
 backend/tolData/eol/reviewImgs.py                | 205 ++++++++++++++
 backend/tolData/genDbpData.py                    | 247 +++++++++++++++++
 backend/tolData/genEnwikiDescData.py             | 102 +++++++
 backend/tolData/genEnwikiNameData.py             |  76 ++++++
 backend/tolData/genEolNameData.py                | 184 +++++++++++++
 backend/tolData/genImgs.py                       | 191 +++++++++++++
 backend/tolData/genLinkedImgs.py                 | 125 +++++++++
 backend/tolData/genOtolData.py                   | 250 +++++++++++++++++
 backend/tolData/genReducedTrees.py               | 329 +++++++++++++++++++++++
 backend/tolData/otol/README.md                   |  10 +
 backend/tolData/pickedImgs/README.md             |  10 +
 backend/tolData/reviewImgsToGen.py               | 225 ++++++++++++++++
 src/lib.ts                                       |   6 +-
 vite.config.js                                   |   2 +-
 57 files changed, 3298 insertions(+), 3278 deletions(-)
 delete mode 100644 backend/data/README.md
 delete mode 100755 backend/data/addPickedNames.py
 delete mode 100644 backend/data/dbpedia/README.md
 delete mode 100755 backend/data/dbpedia/genDescData.py
 delete mode 100644 backend/data/enwiki/README.md
 delete mode 100755 backend/data/enwiki/downloadImgLicenseInfo.py
 delete mode 100755 backend/data/enwiki/downloadImgs.py
 delete mode 100755 backend/data/enwiki/genDescData.py
 delete mode 100755 backend/data/enwiki/genDumpIndexDb.py
 delete mode 100755 backend/data/enwiki/genImgData.py
 delete mode 100755 backend/data/enwiki/lookupPage.py
 delete mode 100644 backend/data/eol/README.md
 delete mode 100755 backend/data/eol/downloadImgs.py
 delete mode 100755 backend/data/eol/genImagesListDb.sh
 delete mode 100755 backend/data/eol/reviewImgs.py
 delete mode 100755 backend/data/genDbpData.py
 delete mode 100755 backend/data/genEnwikiDescData.py
 delete mode 100755 backend/data/genEnwikiNameData.py
 delete mode 100755 backend/data/genEolNameData.py
 delete mode 100755 backend/data/genImgs.py
 delete mode 100755 backend/data/genLinkedImgs.py
 delete mode 100755 backend/data/genOtolData.py
 delete mode 100755 backend/data/genReducedTrees.py
 delete mode 100644 backend/data/otol/README.md
 delete mode 100644 backend/data/pickedImgs/README.md
 delete mode 100755 backend/data/reviewImgsToGen.py
 create mode 100644 backend/tolData/README.md
 create mode 100755 backend/tolData/addPickedNames.py
 create mode 100644 backend/tolData/dbpedia/README.md
 create mode 100755 backend/tolData/dbpedia/genDescData.py
 create mode 100644 backend/tolData/enwiki/README.md
 create mode 100755 backend/tolData/enwiki/downloadImgLicenseInfo.py
 create mode 100755 backend/tolData/enwiki/downloadImgs.py
 create mode 100755 backend/tolData/enwiki/genDescData.py
 create mode 100755 backend/tolData/enwiki/genDumpIndexDb.py
 create mode 100755 backend/tolData/enwiki/genImgData.py
 create mode 100755 backend/tolData/enwiki/lookupPage.py
 create mode 100644 backend/tolData/eol/README.md
 create mode 100755 backend/tolData/eol/downloadImgs.py
 create mode 100755 backend/tolData/eol/genImagesListDb.sh
 create mode 100755 backend/tolData/eol/reviewImgs.py
 create mode 100755 backend/tolData/genDbpData.py
 create mode 100755 backend/tolData/genEnwikiDescData.py
 create mode 100755 backend/tolData/genEnwikiNameData.py
 create mode 100755 backend/tolData/genEolNameData.py
 create mode 100755 backend/tolData/genImgs.py
 create mode 100755 backend/tolData/genLinkedImgs.py
 create mode 100755 backend/tolData/genOtolData.py
 create mode 100755 backend/tolData/genReducedTrees.py
 create mode 100644 backend/tolData/otol/README.md
 create mode 100644 backend/tolData/pickedImgs/README.md
 create mode 100755 backend/tolData/reviewImgsToGen.py

diff --git a/.gitignore b/.gitignore
index 1708166..1194307 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,30 +4,30 @@
 /public/img/
 
 # Backend files
-/backend/data/data.db
-/backend/data/otol/*.tgz
-/backend/data/otol/*.json
-/backend/data/otol/*.tre
-/backend/data/eol/*.tgz
-/backend/data/eol/*.csv
-/backend/data/eol/imagesList/
-/backend/data/eol/*.db
-/backend/data/eol/imgsForReview/
-/backend/data/eol/imgs/
-/backend/data/dbpedia/*.bz2
-/backend/data/dbpedia/*.db
-/backend/data/enwiki/*.bz2
-/backend/data/enwiki/*.db
-/backend/data/enwiki/imgs/
-/backend/data/imgList.txt
-/backend/data/pickedImgs/
-/backend/data/img/
-/backend/data/pickedOtolNames.txt
-/backend/data/pickedEolIds.txt
-/backend/data/pickedEolAltsToSkip.txt
-/backend/data/pickedEnwikiNamesToSkip.txt
-/backend/data/pickedDbpLabels.txt
-/backend/data/pickedEnwikiLabels.txt
-/backend/data/pickedNodes.txt
-/backend/data/pickedNames.txt
+/backend/tolData/data.db
+/backend/tolData/otol/*.tgz
+/backend/tolData/otol/*.json
+/backend/tolData/otol/*.tre
+/backend/tolData/eol/*.tgz
+/backend/tolData/eol/*.csv
+/backend/tolData/eol/imagesList/
+/backend/tolData/eol/*.db
+/backend/tolData/eol/imgsForReview/
+/backend/tolData/eol/imgs/
+/backend/tolData/dbpedia/*.bz2
+/backend/tolData/dbpedia/*.db
+/backend/tolData/enwiki/*.bz2
+/backend/tolData/enwiki/*.db
+/backend/tolData/enwiki/imgs/
+/backend/tolData/imgList.txt
+/backend/tolData/pickedImgs/
+/backend/tolData/img/
+/backend/tolData/pickedOtolNames.txt
+/backend/tolData/pickedEolIds.txt
+/backend/tolData/pickedEolAltsToSkip.txt
+/backend/tolData/pickedEnwikiNamesToSkip.txt
+/backend/tolData/pickedDbpLabels.txt
+/backend/tolData/pickedEnwikiLabels.txt
+/backend/tolData/pickedNodes.txt
+/backend/tolData/pickedNames.txt
 /backend/__pycache__
diff --git a/backend/data/README.md b/backend/data/README.md
deleted file mode 100644
index ba64114..0000000
--- a/backend/data/README.md
+++ /dev/null
@@ -1,152 +0,0 @@
-This directory holds files used to generate data.db, which contains tree-of-life data.
-
-# Tables
-## Tree Structure data
--   `nodes` <br>
-    Format : `name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT` <br>
-    Represents a tree-of-life node. `tips` represents the number of no-child descendants.
--   `edges` <br>
-    Format: `parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child)` <br>
-    `p_support` is 1 if the edge has 'phylogenetic support', and 0 otherwise
-## Node name data
--   `eol_ids` <br>
-    Format: `id INT PRIMARY KEY, name TEXT` <br>
-    Associates an EOL ID with a node's name.
--   `names` <br>
-    Format: `name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name)` <br>
-    Associates a node with alternative names.
-    `pref_alt` is 1 if the alt-name is the most 'preferred' one.
-    `src` indicates the dataset the alt-name was obtained from (can be 'eol', 'enwiki', or 'picked').
-## Node description data
--   `wiki_ids` <br>
-    Format: `name TEXT PRIMARY KEY, id INT, redirected INT` <br>
-    Associates a node with a wikipedia page ID.
-    `redirected` is 1 if the node was associated with a different page that redirected to this one.
--   `descs` <br>
-    Format: `wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT` <br>
-    Associates a wikipedia page ID with a short-description.
-    `from_dbp` is 1 if the description was obtained from DBpedia, and 0 otherwise.
-## Node image data
--   `node_imgs` <br>
-    Format: `name TEXT PRIMARY KEY, img_id INT, src TEXT` <br>
-    Associates a node with an image.
--   `images` <br>
-    Format: `id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src)` <br>
-    Represents an image, identified by a source ('eol', 'enwiki', or 'picked'), and a source-specific ID.
--   `linked_imgs` <br>
-    Format: `name TEXT PRIMARY KEY, otol_ids TEXT` <br>
-    Associates a node with an image from another node.
-    `otol_ids` can be an otol ID, or two comma-separated otol IDs or empty strings.
-        The latter is used for compound nodes.
-## Reduced tree data
--   `nodes_t`, `nodes_i`, `nodes_p` <br>
-    These are like `nodes`, but describe the nodes for various reduced trees.
--   `edges_t`, `edges_i`, `edges_p` <br>
-    Like `edges` but for reduced trees.
-
-# Generating the Database
-
-For the most part, these steps should be done in order.
-
-As a warning, the whole process takes a lot of time and file space. The tree will probably
-have about 2.5 billion nodes. Downloading the images takes several days, and occupies over
-200 GB. And if you want good data, you'll need to do some manual review, which can take weeks.
-
-## Environment
-The scripts are written in python and bash.
-Some of the python scripts require third-party packages:
--   jsonpickle: For encoding class objects as JSON.
--   requests: For downloading data.
--   PIL: For image processing.
--   tkinter: For providing a basic GUI to review images.
--   mwxml, mwparserfromhell: For parsing Wikipedia dumps.
-
-## Generate tree structure data
-1.  Obtain files in otol/, as specified in it's README.
-2.  Run genOtolData.py, which creates data.db, and adds the `nodes` and `edges` tables,
-    using data in otol/. It also uses these files, if they exist:
-    -   pickedOtolNames.txt: Has lines of the form `name1|otolId1`. Some nodes in the
-        tree may have the same name (eg: Pholidota can refer to pangolins or orchids).
-        Normally, such nodes will get the names 'name1', 'name1 [2]', 'name1 [3], etc.
-        This file can be used to manually specify which node should be named 'name1'.
-
-## Generate node name data
-1.  Obtain 'name data files' in eol/, as specified in it's README.
-2.  Run genEolNameData.py, which adds the `names` and `eol_ids` tables, using data in
-    eol/ and the `nodes` table. It also uses these files, if they exist:
-    -   pickedEolIds.txt: Has lines of the form `nodeName1|eolId1` or `nodeName1|`.
-        Specifies node names that should have a particular EOL ID, or no ID.
-        Quite a few taxons have ambiguous names, and may need manual correction.
-        For example, Viola may resolve to a taxon of butterflies or of plants.
-    -   pickedEolAltsToSkip.txt: Has lines of the form `nodeName1|altName1`.
-        Specifies that a node's alt-name set should exclude altName1.
-
-## Generate node description data
-### Get data from DBpedia
-1.  Obtain files in dbpedia/, as specified in it's README.
-2.  Run genDbpData.py, which adds the `wiki_ids` and `descs` tables, using data in
-    dbpedia/ and the `nodes` table. It also uses these files, if they exist:
-    -   pickedEnwikiNamesToSkip.txt: Each line holds the name of a node for which
-        no description should be obtained. Many node names have a same-name
-        wikipedia page that describes something different (eg: Osiris).
-    -   pickedDbpLabels.txt: Has lines of the form `nodeName1|label1`.
-        Specifies node names that should have a particular associated page label.
-### Get data from Wikipedia
-1.  Obtain 'description database files' in enwiki/, as specified in it's README.
-2.  Run genEnwikiDescData.py, which adds to the `wiki_ids` and `descs` tables,
-    using data in enwiki/ and the `nodes` table.
-    It also uses these files, if they exist:
-    -   pickedEnwikiNamesToSkip.txt: Same as with genDbpData.py.
-    -   pickedEnwikiLabels.txt: Similar to pickedDbpLabels.txt.
-
-## Generate node image data
-### Get images from EOL
-1.  Obtain 'image metadata files' in eol/, as specified in it's README.
-2.  In eol/, run downloadImgs.py, which downloads images (possibly multiple per node),
-    into eol/imgsForReview, using data in eol/, as well as the `eol_ids` table.
-3.  In eol/, run reviewImgs.py, which interactively displays the downloaded images for
-    each node, providing the choice of which to use, moving them to eol/imgs/.
-    Uses `names` and `eol_ids` to display extra info.
-### Get images from Wikipedia
-1.  In enwiki/, run genImgData.py, which looks for wikipedia image names for each node,
-    using the `wiki_ids` table, and stores them in a database.
-2.  In enwiki/, run downloadImgLicenseInfo.py, which downloads licensing information for
-    those images, using wikipedia's online API.
-3.  In enwiki/, run downloadImgs.py, which downloads 'permissively-licensed'
-    images into enwiki/imgs/.
-### Merge the image sets
-1.  Run reviewImgsToGen.py, which displays images from eol/imgs/ and enwiki/imgs/,
-    and enables choosing, for each node, which image should be used, if any,
-    and outputs choice information into imgList.txt. Uses the `nodes`,
-    `eol_ids`, and `wiki_ids` tables (as well as `names` to display extra info).
-2.  Run genImgs.py, which creates cropped/resized images in img/, from files listed in
-    imgList.txt and located in eol/ and enwiki/, and creates the `node_imgs` and
-    `images` tables. If pickedImgs/ is present, images within it are also used. <br>
-    The outputs might need to be manually created/adjusted:
-    -   An input image might have no output produced, possibly due to
-        data incompatibilities, memory limits, etc. A few input image files
-        might actually be html files, containing a 'file not found' page.
-    -   An input x.gif might produce x-1.jpg, x-2.jpg, etc, instead of x.jpg.
-    -   An input image might produce output with unexpected dimensions.
-        This seems to happen when the image is very large, and triggers a
-        decompression bomb warning.
-    The result might have as many as 150k images, with about 2/3 of them
-    being from wikipedia.
-### Add more image associations
-1.  Run genLinkedImgs.py, which tries to associate nodes without images to
-    images of it's children. Adds the `linked_imgs` table, and uses the
-    `nodes`, `edges`, and `node_imgs` tables.
-
-## Do some post-processing
-1.  Run genEnwikiNameData.py, which adds more entries to the `names` table,
-    using data in enwiki/, and the `names` and `wiki_ids` tables.
-2.  Optionally run addPickedNames.py, which allows adding manually-selected name data to
-    the `names` table, as specified in pickedNames.txt.
-    -   pickedNames.txt: Has lines of the form `nodeName1|altName1|prefAlt1`.
-        These correspond to entries in the `names` table. `prefAlt` should be 1 or 0.
-        A line like `name1|name1|1` causes a node to have no preferred alt-name.
-3.  Run genReducedTrees.py, which generates multiple reduced versions of the tree,
-    adding the `nodes_*` and `edges_*` tables, using `nodes` and `names`. Reads from
-    pickedNodes.txt, which lists names of nodes that must be included (1 per line).
-    The original tree isn't used for web-queries, as some nodes would have over 
-    10k children, which can take a while to render (took over a minute in testing).
diff --git a/backend/data/addPickedNames.py b/backend/data/addPickedNames.py
deleted file mode 100755
index d56a0cb..0000000
--- a/backend/data/addPickedNames.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/python3
-
-import sys
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads alt-name data from a file, and adds it to the database's 'names' table.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-dbFile = "data.db"
-pickedNamesFile = "pickedNames.txt"
-
-print("Opening database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Iterating through picked-names file")
-with open(pickedNamesFile) as file:
-	for line in file:
-		# Get record data
-		nodeName, altName, prefAlt = line.lower().rstrip().split("|")
-		prefAlt = int(prefAlt)
-		# Check whether there exists a node with the name
-		row = dbCur.execute("SELECT name from nodes where name = ?", (nodeName,)).fetchone()
-		if row == None:
-			print(f"ERROR: No node with name \"{nodeName}\" exists")
-			break
-		# Remove any existing preferred-alt status
-		if prefAlt == 1:
-			query = "SELECT name, alt_name FROM names WHERE name = ? AND pref_alt = 1"
-			row = dbCur.execute(query, (nodeName,)).fetchone()
-			if row != None and row[1] != altName:
-				print(f"Removing pref-alt status from alt-name {row[1]} for {nodeName}")
-				dbCur.execute("UPDATE names SET pref_alt = 0 WHERE name = ? AND alt_name = ?", row)
-		# Check for an existing record
-		if nodeName == altName:
-			continue
-		query = "SELECT name, alt_name, pref_alt FROM names WHERE name = ? AND alt_name = ?"
-		row = dbCur.execute(query, (nodeName, altName)).fetchone()
-		if row == None:
-			print(f"Adding record for alt-name {altName} for {nodeName}")
-			dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'picked')", (nodeName, altName, prefAlt))
-		else:
-			# Update existing record
-			if row[2] != prefAlt:
-				print(f"Updating record for alt-name {altName} for {nodeName}")
-				dbCur.execute("UPDATE names SET pref_alt = ?, src = 'picked' WHERE name = ? AND alt_name = ?",
-					(prefAlt, nodeName, altName))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/dbpedia/README.md b/backend/data/dbpedia/README.md
deleted file mode 100644
index 8a08f20..0000000
--- a/backend/data/dbpedia/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-This directory holds files obtained from/using [Dbpedia](https://www.dbpedia.org).
-
-# Downloaded Files
--   `labels_lang=en.ttl.bz2` <br>
-    Obtained via https://databus.dbpedia.org/dbpedia/collections/latest-core.
-    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/labels/2022.03.01/labels_lang=en.ttl.bz2>.
--   `page_lang=en_ids.ttl.bz2` <br>
-    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/page/2022.03.01/page_lang=en_ids.ttl.bz2>
--   `redirects_lang=en_transitive.ttl.bz2` <br>
-    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/redirects/2022.03.01/redirects_lang=en_transitive.ttl.bz2>.
--   `disambiguations_lang=en.ttl.bz2` <br>
-    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/disambiguations/2022.03.01/disambiguations_lang=en.ttl.bz2>.
--   `instance-types_lang=en_specific.ttl.bz2` <br>
-    Downloaded from <https://databus.dbpedia.org/dbpedia/mappings/instance-types/2022.03.01/instance-types_lang=en_specific.ttl.bz2>.
--   `short-abstracts_lang=en.ttl.bz2` <br>
-    Downloaded from <https://databus.dbpedia.org/vehnem/text/short-abstracts/2021.05.01/short-abstracts_lang=en.ttl.bz2>.
-
-# Other Files
--   genDescData.py <br>
-    Used to generate a database representing data from the ttl files.
--   descData.db <br>
-    Generated by genDescData.py. <br>
-    Tables: <br>
-    -   `labels`:          `iri TEXT PRIMARY KEY, label TEXT `
-    -   `ids`:             `iri TEXT PRIMARY KEY, id INT`
-    -   `redirects`:       `iri TEXT PRIMARY KEY, target TEXT`
-    -   `disambiguations`: `iri TEXT PRIMARY KEY`
-    -   `types`:           `iri TEXT, type TEXT`
-    -   `abstracts`:       `iri TEXT PRIMARY KEY, abstract TEXT`
diff --git a/backend/data/dbpedia/genDescData.py b/backend/data/dbpedia/genDescData.py
deleted file mode 100755
index d9e8a80..0000000
--- a/backend/data/dbpedia/genDescData.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import bz2, sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Adds DBpedia labels/types/abstracts/etc data into a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-labelsFile = "labels_lang=en.ttl.bz2" # Had about 16e6 entries
-idsFile = "page_lang=en_ids.ttl.bz2"
-redirectsFile = "redirects_lang=en_transitive.ttl.bz2"
-disambigFile = "disambiguations_lang=en.ttl.bz2"
-typesFile = "instance-types_lang=en_specific.ttl.bz2"
-abstractsFile = "short-abstracts_lang=en.ttl.bz2"
-dbFile = "descData.db"
-# In testing, this script took a few hours to run, and generated about 10GB
-
-print("Creating database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Reading/storing label data")
-dbCur.execute("CREATE TABLE labels (iri TEXT PRIMARY KEY, label TEXT)")
-dbCur.execute("CREATE INDEX labels_idx ON labels(label)")
-dbCur.execute("CREATE INDEX labels_idx_nc ON labels(label COLLATE NOCASE)")
-labelLineRegex = re.compile(r'<([^>]+)> <[^>]+> "((?:[^"]|\\")+)"@en \.\n')
-lineNum = 0
-with bz2.open(labelsFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = labelLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		dbCur.execute("INSERT INTO labels VALUES (?, ?)", (match.group(1), match.group(2)))
-
-print("Reading/storing wiki page ids")
-dbCur.execute("CREATE TABLE ids (iri TEXT PRIMARY KEY, id INT)")
-idLineRegex = re.compile(r'<([^>]+)> <[^>]+> "(\d+)".*\n')
-lineNum = 0
-with bz2.open(idsFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = idLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		try:
-			dbCur.execute("INSERT INTO ids VALUES (?, ?)", (match.group(1), int(match.group(2))))
-		except sqlite3.IntegrityError as e:
-			# Accounts for certain lines that have the same IRI
-			print(f"WARNING: Failed to add entry with IRI \"{match.group(1)}\": {e}")
-
-print("Reading/storing redirection data")
-dbCur.execute("CREATE TABLE redirects (iri TEXT PRIMARY KEY, target TEXT)")
-redirLineRegex = re.compile(r'<([^>]+)> <[^>]+> <([^>]+)> \.\n')
-lineNum = 0
-with bz2.open(redirectsFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = redirLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (match.group(1), match.group(2)))
-
-print("Reading/storing diambiguation-page data")
-dbCur.execute("CREATE TABLE disambiguations (iri TEXT PRIMARY KEY)")
-disambigLineRegex = redirLineRegex
-lineNum = 0
-with bz2.open(disambigFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = disambigLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		dbCur.execute("INSERT OR IGNORE INTO disambiguations VALUES (?)", (match.group(1),))
-
-print("Reading/storing instance-type data")
-dbCur.execute("CREATE TABLE types (iri TEXT, type TEXT)")
-dbCur.execute("CREATE INDEX types_iri_idx ON types(iri)")
-typeLineRegex = redirLineRegex
-lineNum = 0
-with bz2.open(typesFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = typeLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		dbCur.execute("INSERT INTO types VALUES (?, ?)", (match.group(1), match.group(2)))
-
-print("Reading/storing abstracts")
-dbCur.execute("CREATE TABLE abstracts (iri TEXT PRIMARY KEY, abstract TEXT)")
-descLineRegex = labelLineRegex
-lineNum = 0
-with bz2.open(abstractsFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		if line[0] == "#":
-			continue
-		match = descLineRegex.fullmatch(line)
-		if match == None:
-			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
-		dbCur.execute("INSERT INTO abstracts VALUES (?, ?)",
-			(match.group(1), match.group(2).replace(r'\"', '"')))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/enwiki/README.md b/backend/data/enwiki/README.md
deleted file mode 100644
index 90d16c7..0000000
--- a/backend/data/enwiki/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-This directory holds files obtained from/using [English Wikipedia](https://en.wikipedia.org/wiki/Main_Page).
-
-# Downloaded Files
--   enwiki-20220501-pages-articles-multistream.xml.bz2 <br>
-    Obtained via <https://dumps.wikimedia.org/backup-index.html> (site suggests downloading from a mirror).
-    Contains text content and metadata for pages in enwiki.
-    Some file content and format information was available from
-        <https://meta.wikimedia.org/wiki/Data_dumps/What%27s_available_for_download>.
--   enwiki-20220501-pages-articles-multistream-index.txt.bz2 <br>
-    Obtained like above. Holds lines of the form offset1:pageId1:title1,
-    providing, for each page, an offset into the dump file of a chunk of
-    100 pages that includes it.
-
-# Generated Dump-Index Files
--   genDumpIndexDb.py <br>
-    Creates an sqlite-database version of the enwiki-dump index file.
--   dumpIndex.db <br>
-    Generated by genDumpIndexDb.py. <br>
-    Tables: <br>
-    -   `offsets`: `title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT`
-
-# Description Database Files
--   genDescData.py <br>
-    Reads through pages in the dump file, and adds short-description info to a database.
--   descData.db <br>
-    Generated by genDescData.py. <br>
-    Tables: <br>
-    -   `pages`:     `id INT PRIMARY KEY, title TEXT UNIQUE`
-    -   `redirects`: `id INT PRIMARY KEY, target TEXT`
-    -   `descs`:     `id INT PRIMARY KEY, desc TEXT`
-
-# Image Database Files
--   genImgData.py <br>
-    Used to find infobox image names for page IDs, storing them into a database.
--   downloadImgLicenseInfo.py <br>
-    Used to download licensing metadata for image names, via wikipedia's online API, storing them into a database.
--   imgData.db <br>
-    Used to hold metadata about infobox images for a set of pageIDs.
-    Generated using getEnwikiImgData.py and downloadImgLicenseInfo.py. <br>
-    Tables: <br>
-    -   `page_imgs`: `page_id INT PRIMAY KEY, img_name TEXT` <br>
-        `img_name` may be null, which means 'none found', and is used to avoid re-processing page-ids.
-    -   `imgs`: `name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT` <br>
-        Might lack some matches for `img_name` in `page_imgs`, due to licensing info unavailability.
--   downloadImgs.py <br>
-    Used to download image files into imgs/.
-
-# Other Files
--   lookupPage.py <br>
-    Running `lookupPage.py title1` looks in the dump for a page with a given title,
-    and prints the contents to stdout. Uses dumpIndex.db.
-
diff --git a/backend/data/enwiki/downloadImgLicenseInfo.py b/backend/data/enwiki/downloadImgLicenseInfo.py
deleted file mode 100755
index 399922e..0000000
--- a/backend/data/enwiki/downloadImgLicenseInfo.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import sqlite3, urllib.parse, html
-import requests
-import time, signal
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads image names from a database, and uses enwiki's online API to obtain
-licensing information for them, adding the info to the database.
-
-SIGINT causes the program to finish an ongoing download and exit.
-The program can be re-run to continue downloading, and looks
-at already-processed names to decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-imgDb = "imgData.db"
-apiUrl = "https://en.wikipedia.org/w/api.php"
-userAgent = "terryt.dev (terry06890@gmail.com)"
-batchSz = 50 # Max 50
-tagRegex = re.compile(r"<[^<]+>")
-whitespaceRegex = re.compile(r"\s+")
-
-print("Opening database")
-dbCon = sqlite3.connect(imgDb)
-dbCur = dbCon.cursor()
-dbCur2 = dbCon.cursor()
-print("Checking for table")
-if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='imgs'").fetchone() == None:
-	dbCur.execute("CREATE TABLE imgs(" \
-		"name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT)")
-
-print("Reading image names")
-imgNames = set()
-for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL"):
-	imgNames.add(imgName)
-print(f"Found {len(imgNames)}")
-
-print("Checking for already-processed images")
-oldSz = len(imgNames)
-for (imgName,) in dbCur.execute("SELECT name FROM imgs"):
-	imgNames.discard(imgName)
-print(f"Found {oldSz - len(imgNames)}")
-
-# Set SIGINT handler
-interrupted = False
-oldHandler = None
-def onSigint(sig, frame):
-	global interrupted
-	interrupted = True
-	signal.signal(signal.SIGINT, oldHandler)
-oldHandler = signal.signal(signal.SIGINT, onSigint)
-
-print("Iterating through image names")
-imgNames = list(imgNames)
-iterNum = 0
-for i in range(0, len(imgNames), batchSz):
-	iterNum += 1
-	if iterNum % 1 == 0:
-		print(f"At iteration {iterNum} (after {(iterNum - 1) * batchSz} images)")
-	if interrupted:
-		print(f"Exiting loop at iteration {iterNum}")
-		break
-	# Get batch
-	imgBatch = imgNames[i:i+batchSz]
-	imgBatch = ["File:" + x for x in imgBatch]
-	# Make request
-	headers = {
-		"user-agent": userAgent,
-		"accept-encoding": "gzip",
-	}
-	params = {
-		"action": "query",
-		"format": "json",
-		"prop": "imageinfo",
-		"iiprop": "extmetadata|url",
-		"maxlag": "5",
-		"titles": "|".join(imgBatch),
-		"iiextmetadatafilter": "Artist|Credit|LicenseShortName|Restrictions",
-	}
-	responseObj = None
-	try:
-		response = requests.get(apiUrl, params=params, headers=headers)
-		responseObj = response.json()
-	except Exception as e:
-		print(f"ERROR: Exception while downloading info: {e}")
-		print(f"\tImage batch: " + "|".join(imgBatch))
-		continue
-	# Parse response-object
-	if "query" not in responseObj or "pages" not in responseObj["query"]:
-		print("WARNING: Response object for doesn't have page data")
-		print("\tImage batch: " + "|".join(imgBatch))
-		if "error" in responseObj:
-			errorCode = responseObj["error"]["code"]
-			print(f"\tError code: {errorCode}")
-			if errorCode == "maxlag":
-				time.sleep(5)
-		continue
-	pages = responseObj["query"]["pages"]
-	normalisedToInput = {}
-	if "normalized" in responseObj["query"]:
-		for entry in responseObj["query"]["normalized"]:
-			normalisedToInput[entry["to"]] = entry["from"]
-	for (_, page) in pages.items():
-		# Some fields // More info at https://www.mediawiki.org/wiki/Extension:CommonsMetadata#Returned_data
-			# LicenseShortName: short human-readable license name, apparently more reliable than 'License',
-			# Artist: author name (might contain complex html, multiple authors, etc)
-			# Credit: 'source'
-				# For image-map-like images, can be quite large/complex html, creditng each sub-image
-				# May be <a href="text1">text2</a>, where the text2 might be non-indicative
-			# Restrictions: specifies non-copyright legal restrictions
-		title = page["title"]
-		if title in normalisedToInput:
-			title = normalisedToInput[title]
-		title = title[5:] # Remove 'File:'
-		if title not in imgNames:
-			print(f"WARNING: Got title \"{title}\" not in image-name list")
-			continue
-		if "imageinfo" not in page:
-			print(f"WARNING: No imageinfo section for page \"{title}\"")
-			continue
-		metadata = page["imageinfo"][0]["extmetadata"]
-		url = page["imageinfo"][0]["url"]
-		license = metadata['LicenseShortName']['value'] if 'LicenseShortName' in metadata else None
-		artist = metadata['Artist']['value'] if 'Artist' in metadata else None
-		credit = metadata['Credit']['value'] if 'Credit' in metadata else None
-		restrictions = metadata['Restrictions']['value'] if 'Restrictions' in metadata else None
-		# Remove markup
-		if artist != None:
-			artist = tagRegex.sub(" ", artist)
-			artist = whitespaceRegex.sub(" ", artist)
-			artist = html.unescape(artist)
-			artist = urllib.parse.unquote(artist)
-		if credit != None:
-			credit = tagRegex.sub(" ", credit)
-			credit = whitespaceRegex.sub(" ", credit)
-			credit = html.unescape(credit)
-			credit = urllib.parse.unquote(credit)
-		# Add to db
-		dbCur2.execute("INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?)",
-			(title, license, artist, credit, restrictions, url))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/enwiki/downloadImgs.py b/backend/data/enwiki/downloadImgs.py
deleted file mode 100755
index 8fb605f..0000000
--- a/backend/data/enwiki/downloadImgs.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os
-import sqlite3
-import urllib.parse, requests
-import time, signal
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Downloads images from URLs in an image database, into an output directory,
-with names of the form 'pageId1.ext1'.
-
-SIGINT causes the program to finish an ongoing download and exit.
-The program can be re-run to continue downloading, and looks
-in the output directory do decide what to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-imgDb = "imgData.db" # About 130k image names
-outDir = "imgs"
-licenseRegex = re.compile(r"cc0|cc([ -]by)?([ -]sa)?([ -][1234]\.[05])?( \w\w\w?)?", flags=re.IGNORECASE)
-# In testing, this downloaded about 100k images, over several days
-
-if not os.path.exists(outDir):
-	os.mkdir(outDir)
-print("Checking for already-downloaded images")
-fileList = os.listdir(outDir)
-pageIdsDone = set()
-for filename in fileList:
-	(basename, extension) = os.path.splitext(filename)
-	pageIdsDone.add(int(basename))
-print(f"Found {len(pageIdsDone)}")
-
-# Set SIGINT handler
-interrupted = False
-oldHandler = None
-def onSigint(sig, frame):
-	global interrupted
-	interrupted = True
-	signal.signal(signal.SIGINT, oldHandler)
-oldHandler = signal.signal(signal.SIGINT, onSigint)
-
-print("Opening database")
-dbCon = sqlite3.connect(imgDb)
-dbCur = dbCon.cursor()
-print("Starting downloads")
-iterNum = 0
-query = "SELECT page_id, license, artist, credit, restrictions, url FROM" \
-	" imgs INNER JOIN page_imgs ON imgs.name = page_imgs.img_name"
-for (pageId, license, artist, credit, restrictions, url) in dbCur.execute(query):
-	if pageId in pageIdsDone:
-		continue
-	if interrupted:
-		print(f"Exiting loop")
-		break
-	# Check for problematic attributes
-	if license == None or licenseRegex.fullmatch(license) == None:
-		continue
-	if artist == None or artist == "" or len(artist) > 100 or re.match(r"(\d\. )?File:", artist) != None:
-		continue
-	if credit == None or len(credit) > 300 or re.match(r"File:", credit) != None:
-		continue
-	if restrictions != None and restrictions != "":
-		continue
-	# Download image
-	iterNum += 1
-	print(f"Iteration {iterNum}: Downloading for page-id {pageId}")
-	urlParts = urllib.parse.urlparse(url)
-	extension = os.path.splitext(urlParts.path)[1]
-	if len(extension) <= 1:
-		print(f"WARNING: No filename extension found in URL {url}")
-		sys.exit(1)
-	outFile = f"{outDir}/{pageId}{extension}"
-	headers = {
-		"user-agent": "terryt.dev (terry06890@gmail.com)",
-		"accept-encoding": "gzip",
-	}
-	try:
-		response = requests.get(url, headers=headers)
-		with open(outFile, 'wb') as file:
-			file.write(response.content)
-		time.sleep(1)
-			# https://en.wikipedia.org/wiki/Wikipedia:Database_download says to "throttle self to 1 cache miss per sec"
-			# It's unclear how to properly check for cache misses, so this just aims for 1 per sec
-	except Exception as e:
-		print(f"Error while downloading to {outFile}: {e}")
-print("Closing database")
-dbCon.close()
diff --git a/backend/data/enwiki/genDescData.py b/backend/data/enwiki/genDescData.py
deleted file mode 100755
index b0ca272..0000000
--- a/backend/data/enwiki/genDescData.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os, re
-import bz2
-import html, mwxml, mwparserfromhell
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads through the wiki dump, and attempts to
-parse short-descriptions, and add them to a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # Had about 22e6 pages
-enwikiDb = "descData.db"
-# In testing, this script took over 10 hours to run, and generated about 5GB
-
-descLineRegex = re.compile("^ *[A-Z'\"]")
-embeddedHtmlRegex = re.compile(r"<[^<]+/>|<!--[^<]+-->|<[^</]+>([^<]*|[^<]*<[^<]+>[^<]*)</[^<]+>|<[^<]+$")
-	# Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag
-convertTemplateRegex = re.compile(r"{{convert\|(\d[^|]*)\|(?:(to|-)\|(\d[^|]*)\|)?([a-z][^|}]*)[^}]*}}")
-def convertTemplateReplace(match):
-	if match.group(2) == None:
-		return f"{match.group(1)} {match.group(4)}"
-	else:
-		return f"{match.group(1)} {match.group(2)} {match.group(3)} {match.group(4)}"
-parensGroupRegex = re.compile(r" \([^()]*\)")
-leftoverBraceRegex = re.compile(r"(?:{\||{{).*")
-
-def parseDesc(text):
-	# Find first matching line outside {{...}}, [[...]], and block-html-comment constructs,
-		# and then accumulate lines until a blank one.
-	# Some cases not accounted for include: disambiguation pages, abstracts with sentences split-across-lines, 
-		# nested embedded html, 'content significant' embedded-html, markup not removable with mwparsefromhell, 
-	lines = []
-	openBraceCount = 0
-	openBracketCount = 0
-	inComment = False
-	skip = False
-	for line in text.splitlines():
-		line = line.strip()
-		if len(lines) == 0:
-			if len(line) > 0:
-				if openBraceCount > 0 or line[0] == "{":
-					openBraceCount += line.count("{")
-					openBraceCount -= line.count("}")
-					skip = True
-				if openBracketCount > 0 or line[0] == "[":
-					openBracketCount += line.count("[")
-					openBracketCount -= line.count("]")
-					skip = True
-				if inComment or line.find("<!--") != -1:
-					if line.find("-->") != -1:
-						if inComment:
-							inComment = False
-							skip = True
-					else:
-						inComment = True
-						skip = True
-				if skip:
-					skip = False
-					continue
-				if line[-1] == ":": # Seems to help avoid disambiguation pages
-					return None
-				if descLineRegex.match(line) != None:
-					lines.append(line)
-		else:
-			if len(line) == 0:
-				return removeMarkup(" ".join(lines))
-			lines.append(line)
-	if len(lines) > 0:
-		return removeMarkup(" ".join(lines))
-	return None
-def removeMarkup(content):
-	content = embeddedHtmlRegex.sub("", content)
-	content = convertTemplateRegex.sub(convertTemplateReplace, content)
-	content = mwparserfromhell.parse(content).strip_code() # Remove wikitext markup
-	content = parensGroupRegex.sub("", content)
-	content = leftoverBraceRegex.sub("", content)
-	return content
-def convertTitle(title):
-	return html.unescape(title).replace("_", " ")
-
-print("Creating database")
-if os.path.exists(enwikiDb):
-	raise Exception(f"ERROR: Existing {enwikiDb}")
-dbCon = sqlite3.connect(enwikiDb)
-dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE pages (id INT PRIMARY KEY, title TEXT UNIQUE)")
-dbCur.execute("CREATE INDEX pages_title_idx ON pages(title COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE redirects (id INT PRIMARY KEY, target TEXT)")
-dbCur.execute("CREATE INDEX redirects_idx ON redirects(target)")
-dbCur.execute("CREATE TABLE descs (id INT PRIMARY KEY, desc TEXT)")
-
-print("Iterating through dump file")
-with bz2.open(dumpFile, mode='rt') as file:
-	dump = mwxml.Dump.from_file(file)
-	pageNum = 0
-	for page in dump:
-		pageNum += 1
-		if pageNum % 1e4 == 0:
-			print(f"At page {pageNum}")
-		if pageNum > 3e4:
-			break
-		# Parse page
-		if page.namespace == 0:
-			try:
-				dbCur.execute("INSERT INTO pages VALUES (?, ?)", (page.id, convertTitle(page.title)))
-			except sqlite3.IntegrityError as e:
-				# Accounts for certain pages that have the same title
-				print(f"Failed to add page with title \"{page.title}\": {e}", file=sys.stderr)
-				continue
-			if page.redirect != None:
-				dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (page.id, convertTitle(page.redirect)))
-			else:
-				revision = next(page)
-				desc = parseDesc(revision.text)
-				if desc != None:
-					dbCur.execute("INSERT INTO descs VALUES (?, ?)", (page.id, desc))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/enwiki/genDumpIndexDb.py b/backend/data/enwiki/genDumpIndexDb.py
deleted file mode 100755
index 3955885..0000000
--- a/backend/data/enwiki/genDumpIndexDb.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os, re
-import bz2
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Adds data from the wiki dump index-file into a database.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # Had about 22e6 lines
-indexDb = "dumpIndex.db"
-
-if os.path.exists(indexDb):
-	raise Exception(f"ERROR: Existing {indexDb}")
-print("Creating database")
-dbCon = sqlite3.connect(indexDb)
-dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE offsets (title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT)")
-
-print("Iterating through index file")
-lineRegex = re.compile(r"([^:]+):([^:]+):(.*)")
-lastOffset = 0
-lineNum = 0
-entriesToAdd = []
-with bz2.open(indexFile, mode='rt') as file:
-	for line in file:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		#
-		match = lineRegex.fullmatch(line.rstrip())
-		(offset, pageId, title) = match.group(1,2,3)
-		offset = int(offset)
-		if offset > lastOffset:
-			for (t, p) in entriesToAdd:
-				try:
-					dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (t, p, lastOffset, offset))
-				except sqlite3.IntegrityError as e:
-					# Accounts for certain entries in the file that have the same title
-					print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
-			entriesToAdd = []
-			lastOffset = offset
-		entriesToAdd.append([title, pageId])
-for (title, pageId) in entriesToAdd:
-	try:
-		dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (title, pageId, lastOffset, -1))
-	except sqlite3.IntegrityError as e:
-		print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/enwiki/genImgData.py b/backend/data/enwiki/genImgData.py
deleted file mode 100755
index dedfe14..0000000
--- a/backend/data/enwiki/genImgData.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import bz2, html, urllib.parse
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-For some set of page IDs, looks up their content in the wiki dump,
-and tries to parse infobox image names, storing them into a database.
-
-The program can be re-run with an updated set of page IDs, and
-will skip already-processed page IDs.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-def getInputPageIds():
-	pageIds = set()
-	dbCon = sqlite3.connect("../data.db")
-	dbCur = dbCon.cursor()
-	for (pageId,) in dbCur.execute("SELECT id from wiki_ids"):
-		pageIds.add(pageId)
-	dbCon.close()
-	return pageIds
-dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
-indexDb = "dumpIndex.db"
-imgDb = "imgData.db" # The database to create
-idLineRegex = re.compile(r"<id>(.*)</id>")
-imageLineRegex = re.compile(r".*\| *image *= *([^|]*)")
-bracketImageRegex = re.compile(r"\[\[(File:[^|]*).*]]")
-imageNameRegex = re.compile(r".*\.(jpg|jpeg|png|gif|tiff|tif)", flags=re.IGNORECASE)
-cssImgCropRegex = re.compile(r"{{css image crop\|image *= *(.*)", flags=re.IGNORECASE)
-# In testing, got about 360k image names
-
-print("Getting input page-ids")
-pageIds = getInputPageIds()
-print(f"Found {len(pageIds)}")
-
-print("Opening databases")
-indexDbCon = sqlite3.connect(indexDb)
-indexDbCur = indexDbCon.cursor()
-imgDbCon = sqlite3.connect(imgDb)
-imgDbCur = imgDbCon.cursor()
-print("Checking tables")
-if imgDbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='page_imgs'").fetchone() == None:
-	# Create tables if not present
-	imgDbCur.execute("CREATE TABLE page_imgs (page_id INT PRIMARY KEY, img_name TEXT)") # img_name may be NULL
-	imgDbCur.execute("CREATE INDEX page_imgs_idx ON page_imgs(img_name)")
-else:
-	# Check for already-processed page IDs
-	numSkipped = 0
-	for (pid,) in imgDbCur.execute("SELECT page_id FROM page_imgs"):
-		if pid in pageIds:
-			pageIds.remove(pid)
-			numSkipped += 1
-		else:
-			print(f"WARNING: Found already-processed page ID {pid} which was not in input set")
-	print(f"Will skip {numSkipped} already-processed page IDs")
-
-print("Getting dump-file offsets")
-offsetToPageids = {}
-offsetToEnd = {} # Maps chunk-start offsets to their chunk-end offsets
-iterNum = 0
-for pageId in pageIds:
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	query = "SELECT offset, next_offset FROM offsets WHERE id = ?"
-	row = indexDbCur.execute(query, (pageId,)).fetchone()
-	if row == None:
-		print(f"WARNING: Page ID {pageId} not found")
-		continue
-	(chunkOffset, endOffset) = row
-	offsetToEnd[chunkOffset] = endOffset
-	if chunkOffset not in offsetToPageids:
-		offsetToPageids[chunkOffset] = []
-	offsetToPageids[chunkOffset].append(pageId)
-print(f"Found {len(offsetToEnd)} chunks to check")
-
-print("Iterating through chunks in dump file")
-def getImageName(content):
-	" Given an array of text-content lines, tries to return an infoxbox image name, or None "
-	# Doesn't try and find images in outside-infobox [[File:...]] and <imagemap> sections
-	for line in content:
-		match = imageLineRegex.match(line)
-		if match != None:
-			imageName = match.group(1).strip()
-			if imageName == "":
-				return None
-			imageName = html.unescape(imageName)
-			# Account for {{...
-			if imageName.startswith("{"):
-				match = cssImgCropRegex.match(imageName)
-				if match == None:
-					return None
-				imageName = match.group(1)
-			# Account for [[File:...|...]]
-			if imageName.startswith("["):
-				match = bracketImageRegex.match(imageName)
-				if match == None:
-					return None
-				imageName = match.group(1)
-			# Account for <!--
-			if imageName.find("<!--") != -1:
-				return None
-			# Remove an initial 'File:'
-			if imageName.startswith("File:"):
-				imageName = imageName[5:]
-			# Remove an initial 'Image:'
-			if imageName.startswith("Image:"):
-				imageName = imageName[6:]
-			# Check for extension
-			match = imageNameRegex.match(imageName)
-			if match != None:
-				imageName = match.group(0)
-				imageName = urllib.parse.unquote(imageName)
-				imageName = html.unescape(imageName) # Intentionally unescaping again (handles some odd cases)
-				imageName = imageName.replace("_", " ")
-				return imageName
-			# Exclude lines like: | image = &lt;imagemap&gt;
-			return None
-	return None
-with open(dumpFile, mode='rb') as file:
-	iterNum = 0
-	for (pageOffset, endOffset) in offsetToEnd.items():
-		iterNum += 1
-		if iterNum % 100 == 0:
-			print(f"At iteration {iterNum}")
-		#
-		pageIds = offsetToPageids[pageOffset]
-		# Jump to chunk
-		file.seek(pageOffset)
-		compressedData = file.read(None if endOffset == -1 else endOffset - pageOffset)
-		data = bz2.BZ2Decompressor().decompress(compressedData).decode()
-		# Look in chunk for pages
-		lines = data.splitlines()
-		lineIdx = 0
-		while lineIdx < len(lines):
-			# Look for <page>
-			if lines[lineIdx].lstrip() != "<page>":
-				lineIdx += 1
-				continue
-			# Check page id
-			lineIdx += 3
-			idLine = lines[lineIdx].lstrip()
-			match = idLineRegex.fullmatch(idLine)
-			if match == None or int(match.group(1)) not in pageIds:
-				lineIdx += 1
-				continue
-			pageId = int(match.group(1))
-			lineIdx += 1
-			# Look for <text> in <page>
-			foundText = False
-			while lineIdx < len(lines):
-				if not lines[lineIdx].lstrip().startswith("<text "):
-					lineIdx += 1
-					continue
-				foundText = True
-				# Get text content
-				content = []
-				line = lines[lineIdx]
-				content.append(line[line.find(">") + 1:])
-				lineIdx += 1
-				foundTextEnd = False
-				while lineIdx < len(lines):
-					line = lines[lineIdx]
-					if not line.endswith("</text>"):
-						content.append(line)
-						lineIdx += 1
-						continue
-					foundTextEnd = True
-					content.append(line[:line.rfind("</text>")])
-					# Look for image-filename
-					imageName = getImageName(content)
-					imgDbCur.execute("INSERT into page_imgs VALUES (?, ?)", (pageId, imageName))
-					break
-				if not foundTextEnd:
-					print(f"WARNING: Did not find </text> for page id {pageId}")
-				break
-			if not foundText:
-				print(f"WARNING: Did not find <text> for page id {pageId}")
-
-print("Closing databases")
-indexDbCon.close()
-imgDbCon.commit()
-imgDbCon.close()
diff --git a/backend/data/enwiki/lookupPage.py b/backend/data/enwiki/lookupPage.py
deleted file mode 100755
index 1a90851..0000000
--- a/backend/data/enwiki/lookupPage.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import bz2
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]} title1
-
-Looks up a page with title title1 in the wiki dump, using
-the dump-index db, and prints the corresponding <page>.
-"""
-if len(sys.argv) != 2:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
-indexDb = "dumpIndex.db"
-pageTitle = sys.argv[1].replace("_", " ")
-
-print("Looking up offset in index db")
-dbCon = sqlite3.connect(indexDb)
-dbCur = dbCon.cursor()
-query = "SELECT title, offset, next_offset FROM offsets WHERE title = ?"
-row = dbCur.execute(query, (pageTitle,)).fetchone()
-if row == None:
-	print("Title not found")
-	sys.exit(0)
-_, pageOffset, endOffset = row
-dbCon.close()
-print(f"Found chunk at offset {pageOffset}")
-
-print("Reading from wiki dump")
-content = []
-with open(dumpFile, mode='rb') as file:
-	# Get uncompressed chunk
-	file.seek(pageOffset)
-	compressedData = file.read(None if endOffset == -1 else endOffset - pageOffset)
-	data = bz2.BZ2Decompressor().decompress(compressedData).decode()
-	# Look in chunk for page
-	lines = data.splitlines()
-	lineIdx = 0
-	found = False
-	pageNum = 0
-	while not found:
-		line = lines[lineIdx]
-		if line.lstrip() == "<page>":
-			pageNum += 1
-			if pageNum > 100:
-				print("ERROR: Did not find title after 100 pages")
-				break
-			lineIdx += 1
-			titleLine = lines[lineIdx]
-			if titleLine.lstrip() == '<title>' + pageTitle + '</title>':
-				found = True
-				print(f"Found title in chunk as page {pageNum}")
-				content.append(line)
-				content.append(titleLine)
-				while True:
-					lineIdx += 1
-					line = lines[lineIdx]
-					content.append(line)
-					if line.lstrip() == "</page>":
-						break
-		lineIdx += 1
-
-print("Content: ")
-print("\n".join(content))
diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md
deleted file mode 100644
index 8c527a8..0000000
--- a/backend/data/eol/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-This directory holds files obtained from/using the [Encyclopedia of Life](https://eol.org/).
-
-# Name Data Files
--   vernacularNames.csv <br>
-    Obtained from <https://opendata.eol.org/dataset/vernacular-names> on 24/04/2022 (last updated on 27/10/2020).
-    Contains alternative-name data from EOL.
-
-# Image Metadata Files
--   imagesList.tgz <br>
-    Obtained from <https://opendata.eol.org/dataset/images-list> on 24/04/2022 (last updated on 05/02/2020).
-    Contains metadata for images from EOL.
--   imagesList/ <br>
-    Extracted from imagesList.tgz.
--   genImagesListDb.sh <br>
-    Creates a database, and imports imagesList/*.csv files into it.
--   imagesList.db <br>
-    Created by running genImagesListDb.sh <br>
-    Tables: <br>
-    -   `images`:
-        `content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT`
-
-# Image Generation Files
--   downloadImgs.py <br>
-    Used to download image files into imgsForReview/.
--   reviewImgs.py <br>
-    Used to review images in imgsForReview/, moving acceptable ones into imgs/.
diff --git a/backend/data/eol/downloadImgs.py b/backend/data/eol/downloadImgs.py
deleted file mode 100755
index 96bc085..0000000
--- a/backend/data/eol/downloadImgs.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os, random
-import sqlite3
-import urllib.parse, requests
-import time
-from threading import Thread
-import signal
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-For some set of EOL IDs, downloads associated images from URLs in
-an image-list database. Uses multiple downloading threads.
-
-May obtain multiple images per ID. The images will get names
-with the form 'eolId1 contentId1.ext1'.
-
-SIGINT causes the program to finish ongoing downloads and exit.
-The program can be re-run to continue downloading. It looks for
-already-downloaded files, and continues after the one with
-highest EOL ID.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-# In testing, this downloaded about 70k images, over a few days
-
-imagesListDb = "imagesList.db"
-def getInputEolIds():
-	eolIds = set()
-	dbCon = sqlite3.connect("../data.db")
-	dbCur = dbCon.cursor()
-	for (id,) in dbCur.execute("SELECT id FROM eol_ids"):
-		eolIds.add(id)
-	dbCon.close()
-	return eolIds
-outDir = "imgsForReview/"
-MAX_IMGS_PER_ID = 3
-MAX_THREADS = 5
-POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread)
-POST_DL_DELAY_MAX = 3
-LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain"
-
-print("Getting input EOL IDs")
-eolIds = getInputEolIds()
-print("Getting EOL IDs to download for")
-# Get IDs from images-list db
-imgDbCon = sqlite3.connect(imagesListDb)
-imgCur = imgDbCon.cursor()
-imgListIds = set()
-for (pageId,) in imgCur.execute("SELECT DISTINCT page_id FROM images"):
-	imgListIds.add(pageId)
-# Get set intersection, and sort into list
-eolIds = eolIds.intersection(imgListIds)
-eolIds = sorted(eolIds)
-print(f"Result: {len(eolIds)} EOL IDs")
-
-print("Checking output directory")
-if not os.path.exists(outDir):
-	os.mkdir(outDir)
-print("Finding next ID to download for")
-nextIdx = 0
-fileList = os.listdir(outDir)
-ids = [int(filename.split(" ")[0]) for filename in fileList]
-if len(ids) > 0:
-	ids.sort()
-	nextIdx = eolIds.index(ids[-1]) + 1
-if nextIdx == len(eolIds):
-	print("No IDs left. Exiting...")
-	sys.exit(0)
-
-print("Starting download threads")
-numThreads = 0
-threadException = None # Used for ending main thread after a non-main thread exception
-# Handle SIGINT signals
-interrupted = False
-oldHandler = None
-def onSigint(sig, frame):
-	global interrupted
-	interrupted = True
-	signal.signal(signal.SIGINT, oldHandler)
-oldHandler = signal.signal(signal.SIGINT, onSigint)
-# Function for threads to execute
-def downloadImg(url, outFile):
-	global numThreads, threadException
-	try:
-		data = requests.get(url)
-		with open(outFile, 'wb') as file:
-			file.write(data.content)
-		time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN)
-	except Exception as e:
-		print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr)
-		threadException = e
-	numThreads -= 1
-# Manage downloading
-for idx in range(nextIdx, len(eolIds)):
-	eolId = eolIds[idx]
-	# Get image urls
-	imgDataList = []
-	ownerSet = set() # Used to get images from different owners, for variety
-	exitLoop = False
-	query = "SELECT content_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?"
-	for (contentId, url, license, copyrightOwner) in imgCur.execute(query, (eolId,)):
-		if url.startswith("data/"):
-			url = "https://content.eol.org/" + url
-		urlParts = urllib.parse.urlparse(url)
-		extension = os.path.splitext(urlParts.path)[1]
-		if len(extension) <= 1:
-			print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr)
-			continue
-		# Check image-quantity limit
-		if len(ownerSet) == MAX_IMGS_PER_ID:
-			break
-		# Check for skip conditions
-		if re.fullmatch(LICENSE_REGEX, license) == None:
-			continue
-		if len(copyrightOwner) > 100: # Avoid certain copyrightOwner fields that seem long and problematic
-			continue
-		if copyrightOwner in ownerSet:
-			continue
-		ownerSet.add(copyrightOwner)
-		# Determine output filename
-		outPath = f"{outDir}{eolId} {contentId}{extension}"
-		if os.path.exists(outPath):
-			print(f"WARNING: {outPath} already exists. Skipping download.")
-			continue
-		# Check thread limit
-		while numThreads == MAX_THREADS:
-			time.sleep(1)
-		# Wait for threads after an interrupt or thread-exception
-		if interrupted or threadException != None:
-			print("Waiting for existing threads to end")
-			while numThreads > 0:
-				time.sleep(1)
-			exitLoop = True
-			break
-		# Perform download
-		print(f"Downloading image to {outPath}")
-		numThreads += 1
-		thread = Thread(target=downloadImg, args=(url, outPath), daemon=True)
-		thread.start()
-	if exitLoop:
-		break
-# Close images-list db
-print("Finished downloading")
-imgDbCon.close()
diff --git a/backend/data/eol/genImagesListDb.sh b/backend/data/eol/genImagesListDb.sh
deleted file mode 100755
index 87dd840..0000000
--- a/backend/data/eol/genImagesListDb.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-set -e
-
-# Combine CSV files into one, skipping header lines
-cat imagesList/media_*_{1..58}.csv | tail -n +2 > imagesList.csv
-# Create database, and import the CSV file
-sqlite3 imagesList.db <<END
-CREATE TABLE images (
-	content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT);
-.mode csv
-.import 'imagesList.csv' images
-END
diff --git a/backend/data/eol/reviewImgs.py b/backend/data/eol/reviewImgs.py
deleted file mode 100755
index ecdf7ab..0000000
--- a/backend/data/eol/reviewImgs.py
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os, time
-import sqlite3
-import tkinter as tki
-from tkinter import ttk
-import PIL
-from PIL import ImageTk, Image, ImageOps
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Provides a GUI for reviewing images. Looks in a for-review directory for
-images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to
-choose an image to keep, or reject all. Also provides image rotation.
-Chosen images are placed in another directory, and rejected ones are deleted.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-imgDir = "imgsForReview/"
-outDir = "imgs/"
-extraInfoDbCon = sqlite3.connect("../data.db")
-extraInfoDbCur = extraInfoDbCon.cursor()
-def getExtraInfo(eolId):
-	global extraInfoDbCur
-	query = "SELECT names.alt_name FROM" \
-		" names INNER JOIN eol_ids ON eol_ids.name = names.name" \
-		" WHERE id = ? and pref_alt = 1"
-	row = extraInfoDbCur.execute(query, (eolId,)).fetchone()
-	if row != None:
-		return f"Reviewing EOL ID {eolId}, aka \"{row[0]}\""
-	else:
-		return f"Reviewing EOL ID {eolId}"
-IMG_DISPLAY_SZ = 400
-MAX_IMGS_PER_ID = 3
-IMG_BG_COLOR = (88, 28, 135)
-PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), IMG_BG_COLOR)
-
-print("Checking output directory")
-if not os.path.exists(outDir):
-	os.mkdir(outDir)
-print("Getting input image list")
-imgList = os.listdir(imgDir)
-imgList.sort(key=lambda s: int(s.split(" ")[0]))
-if len(imgList) == 0:
-	print("No input images found")
-	sys.exit(0)
-
-class EolImgReviewer:
-	" Provides the GUI for reviewing images "
-	def __init__(self, root, imgList):
-		self.root = root
-		root.title("EOL Image Reviewer")
-		# Setup main frame
-		mainFrame = ttk.Frame(root, padding="5 5 5 5")
-		mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S))
-		root.columnconfigure(0, weight=1)
-		root.rowconfigure(0, weight=1)
-		# Set up images-to-be-reviewed frames
-		self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation
-		self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter
-			# These need a persistent reference for some reason (doesn't display otherwise)
-		self.labels = []
-		for i in range(MAX_IMGS_PER_ID):
-			frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ)
-			frame.grid(column=i, row=0)
-			label = ttk.Label(frame, image=self.photoImgs[i])
-			label.grid(column=0, row=0)
-			self.labels.append(label)
-		# Add padding
-		for child in mainFrame.winfo_children():
-			child.grid_configure(padx=5, pady=5)
-		# Add keyboard bindings
-		root.bind("<q>", self.quit)
-		root.bind("<Key-j>", lambda evt: self.accept(0))
-		root.bind("<Key-k>", lambda evt: self.accept(1))
-		root.bind("<Key-l>", lambda evt: self.accept(2))
-		root.bind("<Key-i>", lambda evt: self.reject())
-		root.bind("<Key-a>", lambda evt: self.rotate(0))
-		root.bind("<Key-s>", lambda evt: self.rotate(1))
-		root.bind("<Key-d>", lambda evt: self.rotate(2))
-		root.bind("<Key-A>", lambda evt: self.rotate(0, True))
-		root.bind("<Key-S>", lambda evt: self.rotate(1, True))
-		root.bind("<Key-D>", lambda evt: self.rotate(2, True))
-		# Initialise images to review
-		self.imgList = imgList
-		self.imgListIdx = 0
-		self.nextEolId = 0
-		self.nextImgNames = []
-		self.rotations = []
-		self.getNextImgs()
-		# For displaying extra info
-		self.numReviewed = 0
-		self.startTime = time.time()
-	def getNextImgs(self):
-		" Updates display with new images to review, or ends program "
-		# Gather names of next images to review
-		for i in range(MAX_IMGS_PER_ID):
-			if self.imgListIdx == len(self.imgList):
-				if i == 0:
-					self.quit()
-					return
-				break
-			imgName = self.imgList[self.imgListIdx]
-			eolId = int(re.match(r"(\d+) (\d+)", imgName).group(1))
-			if i == 0:
-				self.nextEolId = eolId
-				self.nextImgNames = [imgName]
-				self.rotations = [0]
-			else:
-				if self.nextEolId != eolId:
-					break
-				self.nextImgNames.append(imgName)
-				self.rotations.append(0)
-			self.imgListIdx += 1
-		# Update displayed images
-		idx = 0
-		while idx < MAX_IMGS_PER_ID:
-			if idx < len(self.nextImgNames):
-				try:
-					img = Image.open(imgDir + self.nextImgNames[idx])
-					img = ImageOps.exif_transpose(img)
-				except PIL.UnidentifiedImageError:
-					os.remove(imgDir + self.nextImgNames[idx])
-					del self.nextImgNames[idx]
-					del self.rotations[idx]
-					continue
-				self.imgs[idx] = self.resizeImgForDisplay(img)
-			else:
-				self.imgs[idx] = PLACEHOLDER_IMG
-			self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx])
-			self.labels[idx].config(image=self.photoImgs[idx])
-			idx += 1
-		# Restart if all image files non-recognisable
-		if len(self.nextImgNames) == 0:
-			self.getNextImgs()
-			return
-		# Update title
-		firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1
-		lastImgIdx = self.imgListIdx
-		title = getExtraInfo(self.nextEolId)
-		title += f" (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})"
-		self.root.title(title)
-	def accept(self, imgIdx):
-		" React to a user selecting an image "
-		if imgIdx >= len(self.nextImgNames):
-			print("Invalid selection")
-			return
-		for i in range(len(self.nextImgNames)):
-			inFile = imgDir + self.nextImgNames[i]
-			if i == imgIdx: # Move accepted image, rotating if needed
-				outFile = outDir + self.nextImgNames[i]
-				img = Image.open(inFile)
-				img = ImageOps.exif_transpose(img)
-				if self.rotations[i] != 0:
-					img = img.rotate(self.rotations[i], expand=True)
-				img.save(outFile)
-				os.remove(inFile)
-			else: # Delete non-accepted image
-				os.remove(inFile)
-		self.numReviewed += 1
-		self.getNextImgs()
-	def reject(self):
-		" React to a user rejecting all images of a set "
-		for i in range(len(self.nextImgNames)):
-			os.remove(imgDir + self.nextImgNames[i])
-		self.numReviewed += 1
-		self.getNextImgs()
-	def rotate(self, imgIdx, anticlockwise = False):
-		" Respond to a user rotating an image "
-		deg = -90 if not anticlockwise else 90
-		self.imgs[imgIdx] = self.imgs[imgIdx].rotate(deg)
-		self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx])
-		self.labels[imgIdx].config(image=self.photoImgs[imgIdx])
-		self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360
-	def quit(self, e = None):
-		global extraInfoDbCon
-		print(f"Number reviewed: {self.numReviewed}")
-		timeElapsed = time.time() - self.startTime
-		print(f"Time elapsed: {timeElapsed:.2f} seconds")
-		if self.numReviewed > 0:
-			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
-		extraInfoDbCon.close()
-		self.root.destroy()
-	def resizeImgForDisplay(self, img):
-		" Returns a copy of an image, shrunk to fit in it's frame (keeps aspect ratio), and with a background "
-		if max(img.width, img.height) > IMG_DISPLAY_SZ:
-			if (img.width > img.height):
-				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
-				img = img.resize((IMG_DISPLAY_SZ, newHeight))
-			else:
-				newWidth = int(img.width * IMG_DISPLAY_SZ / img.height)
-				img = img.resize((newWidth, IMG_DISPLAY_SZ))
-		bgImg = PLACEHOLDER_IMG.copy()
-		bgImg.paste(img, box=(
-			int((IMG_DISPLAY_SZ - img.width) / 2),
-			int((IMG_DISPLAY_SZ - img.height) / 2)))
-		return bgImg
-# Create GUI and defer control
-print("Starting GUI")
-root = tki.Tk()
-EolImgReviewer(root, imgList)
-root.mainloop()
diff --git a/backend/data/genDbpData.py b/backend/data/genDbpData.py
deleted file mode 100755
index df3a6be..0000000
--- a/backend/data/genDbpData.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os, re
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads a database containing data from DBpedia, and tries to associate
-DBpedia IRIs with nodes in a database, adding short-descriptions for them.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-dbpediaDb = "dbpedia/descData.db"
-namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
-pickedLabelsFile = "pickedDbpLabels.txt"
-dbFile = "data.db"
-rootNodeName = "cellular organisms"
-rootLabel = "organism" # Will be associated with root node
-# Got about 400k descriptions when testing
-
-print("Opening databases")
-dbpCon = sqlite3.connect(dbpediaDb)
-dbpCur = dbpCon.cursor()
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Getting node names")
-nodeNames = set()
-for (name,) in dbCur.execute("SELECT name from nodes"):
-	nodeNames.add(name)
-
-print("Checking for names to skip")
-oldSz = len(nodeNames)
-if os.path.exists(namesToSkipFile):
-	with open(namesToSkipFile) as file:
-		for line in file:
-			nodeNames.remove(line.rstrip())
-print(f"Skipping {oldSz - len(nodeNames)} nodes")
-
-print("Reading disambiguation-page labels")
-disambigLabels = set()
-query = "SELECT labels.iri from labels INNER JOIN disambiguations ON labels.iri = disambiguations.iri"
-for (label,) in dbpCur.execute(query):
-	disambigLabels.add(label)
-
-print("Trying to associate nodes with DBpedia labels")
-nodeToLabel = {}
-nameVariantRegex = re.compile(r"(.*) \(([^)]+)\)") # Used to recognise labels like 'Thor (shrimp)'
-nameToVariants = {} # Maps node names to lists of matching labels
-iterNum = 0
-for (label,) in dbpCur.execute("SELECT label from labels"):
-	iterNum += 1
-	if iterNum % 1e5 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	if label in disambigLabels:
-		continue
-	name = label.lower()
-	if name in nodeNames:
-		if name not in nameToVariants:
-			nameToVariants[name] = [label]
-		elif label not in nameToVariants[name]:
-			nameToVariants[name].append(label)
-	else:
-		match = nameVariantRegex.fullmatch(name)
-		if match != None:
-			subName = match.group(1)
-			if subName in nodeNames and match.group(2) != "disambiguation":
-				if subName not in nameToVariants:
-					nameToVariants[subName] = [label]
-				elif name not in nameToVariants[subName]:
-					nameToVariants[subName].append(label)
-# Associate labels without conflicts
-for (name, variants) in nameToVariants.items():
-	if len(variants) == 1:
-		nodeToLabel[name] = variants[0]
-for name in nodeToLabel:
-	del nameToVariants[name]
-# Special case for root node
-nodeToLabel[rootNodeName] = rootLabel
-if rootNodeName in nameToVariants:
-	del nameToVariants["cellular organisms"]
-
-print("Trying to resolve {len(nameToVariants)} conflicts")
-def resolveWithPickedLabels():
-	" Attempts to resolve conflicts using a picked-names file "
-	with open(pickedLabelsFile) as file:
-		for line in file:
-			(name, _, label) = line.rstrip().partition("|")
-			if name not in nameToVariants:
-				print(f"WARNING: No conflict found for name \"{name}\"", file=sys.stderr)
-				continue
-			if label == "":
-				del nameToVariants[name]
-			else:
-				if label not in nameToVariants[name]:
-					print(f"INFO: Picked label \"{label}\" for name \"{name}\" outside choice set", file=sys.stderr)
-				nodeToLabel[name] = label
-				del nameToVariants[name]
-def resolveWithCategoryList():
-	"""
-	Attempts to resolve conflicts by looking for labels like 'name1 (category1)',
-	and choosing those with a category1 that seems 'biological'.
-	Does two passes, using more generic categories first. This helps avoid stuff like
-	Pan being classified as a horse instead of an ape.
-	"""
-	generalCategories = {
-		"species", "genus",
-		"plant", "fungus", "animal",
-		"annelid", "mollusc", "arthropod", "crustacean", "insect", "bug",
-		"fish", "amphibian", "reptile", "bird", "mammal",
-	}
-	specificCategories = {
-		"protist", "alveolate", "dinoflagellates",
-		"orchid", "poaceae", "fern", "moss", "alga",
-		"bryozoan", "hydrozoan",
-		"sponge", "cnidarian", "coral", "polychaete", "echinoderm",
-		"bivalve", "gastropod", "chiton",
-		"shrimp", "decapod", "crab", "barnacle", "copepod",
-		"arachnid", "spider", "harvestman", "mite",
-		"dragonfly", "mantis", "cicada", "grasshopper", "planthopper",
-			"beetle", "fly", "butterfly", "moth", "wasp",
-		"catfish",
-		"frog",
-		"lizard",
-		"horse", "sheep", "cattle", "mouse",
-	}
-	namesToRemove = set()
-	for (name, variants) in nameToVariants.items():
-		found = False
-		for label in variants:
-			match = nameVariantRegex.match(label)
-			if match != None and match.group(2) in generalCategories:
-				nodeToLabel[name] = label
-				namesToRemove.add(name)
-				found = True
-				break
-		if not found:
-			for label in variants:
-				match = nameVariantRegex.match(label)
-				if match != None and match.group(2) in specificCategories:
-					nodeToLabel[name] = label
-					namesToRemove.add(name)
-					break
-	for name in namesToRemove:
-		del nameToVariants[name]
-def resolveWithTypeData():
-	" Attempts to resolve conflicts using DBpedia's type data "
-	taxonTypes = { # Obtained from the DBpedia ontology
-		"http://dbpedia.org/ontology/Species",
-		"http://dbpedia.org/ontology/Archaea",
-		"http://dbpedia.org/ontology/Bacteria",
-		"http://dbpedia.org/ontology/Eukaryote",
-		"http://dbpedia.org/ontology/Plant",
-		"http://dbpedia.org/ontology/ClubMoss",
-		"http://dbpedia.org/ontology/Conifer",
-		"http://dbpedia.org/ontology/CultivatedVariety",
-		"http://dbpedia.org/ontology/Cycad",
-		"http://dbpedia.org/ontology/Fern",
-		"http://dbpedia.org/ontology/FloweringPlant",
-		"http://dbpedia.org/ontology/Grape",
-		"http://dbpedia.org/ontology/Ginkgo",
-		"http://dbpedia.org/ontology/Gnetophytes",
-		"http://dbpedia.org/ontology/GreenAlga",
-		"http://dbpedia.org/ontology/Moss",
-		"http://dbpedia.org/ontology/Fungus",
-		"http://dbpedia.org/ontology/Animal",
-		"http://dbpedia.org/ontology/Fish",
-		"http://dbpedia.org/ontology/Crustacean",
-		"http://dbpedia.org/ontology/Mollusca",
-		"http://dbpedia.org/ontology/Insect",
-		"http://dbpedia.org/ontology/Arachnid",
-		"http://dbpedia.org/ontology/Amphibian",
-		"http://dbpedia.org/ontology/Reptile",
-		"http://dbpedia.org/ontology/Bird",
-		"http://dbpedia.org/ontology/Mammal",
-		"http://dbpedia.org/ontology/Cat",
-		"http://dbpedia.org/ontology/Dog",
-		"http://dbpedia.org/ontology/Horse",
-	}
-	iterNum = 0
-	for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN types on labels.iri = types.iri"):
-		iterNum += 1
-		if iterNum % 1e5 == 0:
-			print(f"At iteration {iterNum}")
-		#
-		if type in taxonTypes:
-			name = label.lower()
-			if name in nameToVariants:
-				nodeToLabel[name] = label
-				del nameToVariants[name]
-			else:
-				match = nameVariantRegex.fullmatch(name)
-				if match != None:
-					name = match.group(1)
-					if name in nameToVariants:
-						nodeToLabel[name] = label
-						del nameToVariants[name]
-#resolveWithTypeData()
-#resolveWithCategoryList()
-resolveWithPickedLabels()
-print(f"Remaining number of conflicts: {len(nameToVariants)}")
-
-print("Getting node IRIs")
-nodeToIri = {}
-for (name, label) in nodeToLabel.items():
-	(iri,) = dbpCur.execute("SELECT iri FROM labels where label = ? COLLATE NOCASE", (label,)).fetchone()
-	nodeToIri[name] = iri
-
-print("Resolving redirects")
-redirectingIriSet = set()
-iterNum = 0
-for (name, iri) in nodeToIri.items():
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	row = dbpCur.execute("SELECT target FROM redirects where iri = ?", (iri,)).fetchone()
-	if row != None:
-		nodeToIri[name] = row[0]
-		redirectingIriSet.add(name)
-
-print("Adding description tables")
-dbCur.execute("CREATE TABLE wiki_ids (name TEXT PRIMARY KEY, id INT, redirected INT)")
-dbCur.execute("CREATE INDEX wiki_id_idx ON wiki_ids(id)")
-dbCur.execute("CREATE TABLE descs (wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT)")
-iterNum = 0
-for (name, iri) in nodeToIri.items():
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	query = "SELECT abstract, id FROM abstracts INNER JOIN ids ON abstracts.iri = ids.iri WHERE ids.iri = ?"
-	row = dbpCur.execute(query, (iri,)).fetchone()
-	if row != None:
-		desc, wikiId = row
-		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, wikiId, 1 if name in redirectingIriSet else 0))
-		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (wikiId, desc, 1))
-
-print("Closing databases")
-dbCon.commit()
-dbCon.close()
-dbpCon.commit()
-dbpCon.close()
diff --git a/backend/data/genEnwikiDescData.py b/backend/data/genEnwikiDescData.py
deleted file mode 100755
index d3f93ed..0000000
--- a/backend/data/genEnwikiDescData.py
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads a database containing data from Wikipedia, and tries to associate
-wiki pages with nodes in the database, and add descriptions for nodes
-that don't have them.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-enwikiDb = "enwiki/descData.db"
-dbFile = "data.db"
-namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
-pickedLabelsFile = "pickedEnwikiLabels.txt"
-# Got about 25k descriptions when testing
-
-print("Opening databases")
-enwikiCon = sqlite3.connect(enwikiDb)
-enwikiCur = enwikiCon.cursor()
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Checking for names to skip")
-namesToSkip = set()
-if os.path.exists(namesToSkipFile):
-	with open(namesToSkipFile) as file:
-		for line in file:
-			namesToSkip.add(line.rstrip())
-	print(f"Found {len(namesToSkip)}")
-print("Checking for picked-titles")
-nameToPickedTitle = {}
-if os.path.exists(pickedLabelsFile):
-	with open(pickedLabelsFile) as file:
-		for line in file:
-			(name, _, title) = line.rstrip().partition("|")
-			nameToPickedTitle[name.lower()] = title
-print(f"Found {len(nameToPickedTitle)}")
-
-print("Getting names of nodes without descriptions")
-nodeNames = set()
-query = "SELECT nodes.name FROM nodes LEFT JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id IS NULL"
-for (name,) in dbCur.execute(query):
-	nodeNames.add(name)
-print(f"Found {len(nodeNames)}")
-nodeNames.difference_update(namesToSkip)
-
-print("Associating nodes with page IDs")
-nodeToPageId = {}
-iterNum = 0
-for name in nodeNames:
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	if name not in nameToPickedTitle:
-		row = enwikiCur.execute("SELECT id FROM pages WHERE pages.title = ? COLLATE NOCASE", (name,)).fetchone()
-		if row != None:
-			nodeToPageId[name] = row[0]
-	else:
-		title = nameToPickedTitle[name]
-		row = enwikiCur.execute("SELECT id FROM pages WHERE pages.title = ?", (title,)).fetchone()
-		if row != None:
-			nodeToPageId[name] = row[0]
-		else:
-			print("WARNING: Picked title {title} not found", file=sys.stderr)
-
-print("Resolving redirects")
-redirectingNames = set()
-iterNum = 0
-for (name, pageId) in nodeToPageId.items():
-	iterNum += 1
-	if iterNum % 1e3 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	query = "SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?"
-	row = enwikiCur.execute(query, (pageId,)).fetchone()
-	if row != None:
-		nodeToPageId[name] = row[0]
-		redirectingNames.add(name)
-
-print("Adding description data")
-iterNum = 0
-for (name, pageId) in nodeToPageId.items():
-	iterNum += 1
-	if iterNum % 1e3 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	row = enwikiCur.execute("SELECT desc FROM descs where descs.id = ?", (pageId,)).fetchone()
-	if row != None:
-		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, pageId, 1 if name in redirectingNames else 0))
-		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (pageId, row[0], 0))
-
-print("Closing databases")
-dbCon.commit()
-dbCon.close()
-enwikiCon.close()
diff --git a/backend/data/genEnwikiNameData.py b/backend/data/genEnwikiNameData.py
deleted file mode 100755
index 7ad61d1..0000000
--- a/backend/data/genEnwikiNameData.py
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads from a database containing data from Wikipdia, along with
-node and wiki-id information from the database, and use wikipedia
-page-redirect information to add additional alt-name data.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-enwikiDb = "enwiki/descData.db"
-dbFile = "data.db"
-altNameRegex = re.compile(r"[a-zA-Z]+")
-	# Avoids names like 'Evolution of Elephants', 'Banana fiber', 'Fish (zoology)',
-
-print("Opening databases")
-enwikiCon = sqlite3.connect(enwikiDb)
-enwikiCur = enwikiCon.cursor()
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Getting nodes with wiki IDs")
-nodeToWikiId = {}
-for (nodeName, wikiId) in dbCur.execute("SELECT name, id from wiki_ids"):
-	nodeToWikiId[nodeName] = wikiId
-print(f"Found {len(nodeToWikiId)}")
-
-print("Iterating through nodes, finding names that redirect to them")
-nodeToAltNames = {}
-numAltNames = 0
-iterNum = 0
-for (nodeName, wikiId) in nodeToWikiId.items():
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	nodeToAltNames[nodeName] = set()
-	query = "SELECT p1.title FROM pages p1" \
-		" INNER JOIN redirects r1 ON p1.id = r1.id" \
-		" INNER JOIN pages p2 ON r1.target = p2.title WHERE p2.id = ?"
-	for (name,) in enwikiCur.execute(query, (wikiId,)):
-		if altNameRegex.fullmatch(name) != None and name.lower() != nodeName:
-			nodeToAltNames[nodeName].add(name.lower())
-			numAltNames += 1
-print(f"Found {numAltNames} alt-names")
-
-print("Excluding existing alt-names from the set")
-query = "SELECT alt_name FROM names WHERE alt_name IN ({})"
-iterNum = 0
-for (nodeName, altNames) in nodeToAltNames.items():
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	existingNames = set()
-	for (name,) in dbCur.execute(query.format(",".join(["?"] * len(altNames))), list(altNames)):
-		existingNames.add(name)
-	numAltNames -= len(existingNames)
-	altNames.difference_update(existingNames)
-print(f"Left with {numAltNames} alt-names")
-
-print("Adding alt-names to database")
-for (nodeName, altNames) in nodeToAltNames.items():
-	for altName in altNames:
-		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'enwiki')", (nodeName, altName, 0))
-
-print("Closing databases")
-dbCon.commit()
-dbCon.close()
-enwikiCon.close()
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py
deleted file mode 100755
index dd33ee0..0000000
--- a/backend/data/genEolNameData.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os
-import html, csv, sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads files describing name data from the 'Encyclopedia of Life' site,
-tries to associate names with nodes in the database, and adds tables
-to represent associated names.
-
-Reads a vernacularNames.csv file:
-	Starts with a header line containing:
-		page_id, canonical_form, vernacular_string, language_code,
-		resource_name, is_preferred_by_resource, is_preferred_by_eol
-	The canonical_form and vernacular_string fields contain names
-		associated with the page ID. Names are not always unique to
-		particular page IDs.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-vnamesFile = "eol/vernacularNames.csv" # Had about 2.8e6 entries
-dbFile = "data.db"
-namesToSkip = {"unknown", "unknown species", "unidentified species"}
-pickedIdsFile = "pickedEolIds.txt"
-altsToSkipFile = "pickedEolAltsToSkip.txt"
-
-print("Reading in vernacular-names data")
-nameToPids = {} # 'pid' means 'Page ID'
-canonicalNameToPids = {}
-pidToNames = {}
-pidToPreferred = {} # Maps pids to 'preferred' names
-def updateMaps(name, pid, canonical, preferredAlt):
-	global namesToSkip, nameToPids, canonicalNameToPids, pidToNames, pidToPreferred
-	if name in namesToSkip:
-		return
-	if name not in nameToPids:
-		nameToPids[name] = {pid}
-	else:
-		nameToPids[name].add(pid)
-	if canonical:
-		if name not in canonicalNameToPids:
-			canonicalNameToPids[name] = {pid}
-		else:
-			canonicalNameToPids[name].add(pid)
-	if pid not in pidToNames:
-		pidToNames[pid] = {name}
-	else:
-		pidToNames[pid].add(name)
-	if preferredAlt:
-		pidToPreferred[pid] = name
-with open(vnamesFile, newline="") as csvfile:
-	reader = csv.reader(csvfile)
-	lineNum = 0
-	for row in reader:
-		lineNum += 1
-		if lineNum % 1e5 == 0:
-			print(f"At line {lineNum}")
-		# Skip header line
-		if lineNum == 1:
-			continue
-		# Parse line
-		pid = int(row[0])
-		name1 = re.sub(r"<[^>]+>", "", row[1].lower()) # Remove tags
-		name2 = html.unescape(row[2]).lower()
-		lang = row[3]
-		preferred = row[6] == "preferred"
-		# Add to maps
-		updateMaps(name1, pid, True, False)
-		if lang == "eng" and name2 != "":
-			updateMaps(name2, pid, False, preferred)
-
-print("Checking for manually-picked pids")
-nameToPickedPid = {}
-if os.path.exists(pickedIdsFile):
-	with open(pickedIdsFile) as file:
-		for line in file:
-			(name, _, eolId) = line.rstrip().partition("|")
-			nameToPickedPid[name] = None if eolId == "" else int(eolId)
-print(f"Found {len(nameToPickedPid)}")
-
-print("Checking for alt-names to skip")
-nameToAltsToSkip = {}
-numToSkip = 0
-if os.path.exists(altsToSkipFile):
-	with open(altsToSkipFile) as file:
-		for line in file:
-			(name, _, altName) = line.rstrip().partition("|")
-			if name not in nameToAltsToSkip:
-				nameToAltsToSkip[name] = [altName]
-			else:
-				nameToAltsToSkip[name].append(altName)
-			numToSkip += 1
-print(f"Found {numToSkip} alt-names to skip")
-
-print("Creating database tables")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name))")
-dbCur.execute("CREATE INDEX names_idx ON names(name)")
-dbCur.execute("CREATE INDEX names_alt_idx ON names(alt_name)")
-dbCur.execute("CREATE INDEX names_alt_idx_nc ON names(alt_name COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE eol_ids(id INT PRIMARY KEY, name TEXT)")
-dbCur.execute("CREATE INDEX eol_name_idx ON eol_ids(name)")
-
-print("Associating nodes with names")
-usedPids = set()
-unresolvedNodeNames = set()
-dbCur2 = dbCon.cursor()
-def addToDb(nodeName, pidToUse):
-	" Adds page-ID-associated name data to a node in the database "
-	global dbCur, pidToPreferred
-	dbCur.execute("INSERT INTO eol_ids VALUES (?, ?)", (pidToUse, nodeName))
-	# Get alt-names
-	altNames = set()
-	for n in pidToNames[pidToUse]:
-		# Avoid alt-names with >3 words
-		if len(n.split(" ")) > 3:
-			continue
-		# Avoid alt-names that already name a node in the database
-		if dbCur.execute("SELECT name FROM nodes WHERE name = ?", (n,)).fetchone() != None:
-			continue
-		# Check for picked alt-name-to-skip
-		if nodeName in nameToAltsToSkip and n in nameToAltsToSkip[nodeName]:
-			print(f"Excluding alt-name {n} for node {nodeName}")
-			continue
-		#
-		altNames.add(n)
-	# Add alt-names to db
-	preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
-	for n in altNames:
-		isPreferred = 1 if (n == preferredName) else 0
-		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'eol')", (nodeName, n, isPreferred))
-print("Adding picked IDs")
-for (name, pid) in nameToPickedPid.items():
-	if pid != None:
-		addToDb(name, pid)
-		usedPids.add(pid)
-print("Associating nodes with canonical names")
-iterNum = 0
-for (nodeName,) in dbCur2.execute("SELECT name FROM nodes"):
-	iterNum += 1
-	if iterNum % 1e5 == 0:
-		print(f"At iteration {iterNum}")
-	if nodeName in nameToPickedPid:
-		continue
-	# Check for matching canonical name
-	if nodeName in canonicalNameToPids:
-		pidToUse = None
-		# Pick an associated page ID
-		for pid in canonicalNameToPids[nodeName]:
-			hasLowerPrio = pid not in pidToPreferred and pidToUse in pidToPreferred
-			hasHigherPrio = pid in pidToPreferred and pidToUse not in pidToPreferred
-			if hasLowerPrio:
-				continue
-			if pid not in usedPids and (pidToUse == None or pid < pidToUse or hasHigherPrio):
-				pidToUse = pid
-		if pidToUse != None:
-			addToDb(nodeName, pidToUse)
-			usedPids.add(pidToUse)
-	elif nodeName in nameToPids:
-		unresolvedNodeNames.add(nodeName)
-print("Associating leftover nodes with other names")
-iterNum = 0
-for nodeName in unresolvedNodeNames:
-	iterNum += 1
-	if iterNum % 100 == 0:
-		print(f"At iteration {iterNum}")
-	# Check for matching name
-	pidToUse = None
-	for pid in nameToPids[nodeName]:
-		# Pick an associated page ID
-		if pid not in usedPids and (pidToUse == None or pid < pidToUse):
-			pidToUse = pid
-	if pidToUse != None:
-		addToDb(nodeName, pidToUse)
-		usedPids.add(pidToUse)
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/genImgs.py b/backend/data/genImgs.py
deleted file mode 100755
index ecca8e0..0000000
--- a/backend/data/genImgs.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os, subprocess
-import sqlite3, urllib.parse
-import signal
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads node IDs and image paths from a file, and possibly from a directory,
-and generates cropped/resized versions of those images into a directory,
-with names of the form 'nodeId1.jpg'. Also adds image metadata to the
-database.
-
-SIGINT can be used to stop, and the program can be re-run to continue
-processing. It uses already-existing database entries to decide what
-to skip.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-imgListFile = "imgList.txt"
-outDir = "img/"
-eolImgDb = "eol/imagesList.db"
-enwikiImgDb = "enwiki/imgData.db"
-pickedImgsDir = "pickedImgs/"
-pickedImgsFilename = "imgData.txt"
-dbFile = "data.db"
-IMG_OUT_SZ = 200
-genImgFiles = True # Usable for debugging
-
-if not os.path.exists(outDir):
-	os.mkdir(outDir)
-
-print("Opening databases")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-eolCon = sqlite3.connect(eolImgDb)
-eolCur = eolCon.cursor()
-enwikiCon = sqlite3.connect(enwikiImgDb)
-enwikiCur = enwikiCon.cursor()
-print("Checking for picked-images")
-nodeToPickedImg = {}
-if os.path.exists(pickedImgsDir + pickedImgsFilename):
-	lineNum = 0
-	with open(pickedImgsDir + pickedImgsFilename) as file:
-		for line in file:
-			lineNum += 1
-			(filename, url, license, artist, credit) = line.rstrip().split("|")
-			nodeName = os.path.splitext(filename)[0] # Remove extension
-			(otolId,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (nodeName,)).fetchone()
-			nodeToPickedImg[otolId] = {
-				"nodeName": nodeName, "id": lineNum,
-				"filename": filename, "url": url, "license": license, "artist": artist, "credit": credit,
-			}
-
-print("Checking for image tables")
-nodesDone = set()
-imgsDone = set()
-if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='node_imgs'").fetchone() == None:
-	# Add image tables if not present
-	dbCur.execute("CREATE TABLE node_imgs (name TEXT PRIMARY KEY, img_id INT, src TEXT)")
-	dbCur.execute("CREATE TABLE images" \
-		" (id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src))")
-else:
-	# Get existing image-associated nodes
-	for (otolId,) in dbCur.execute("SELECT nodes.id FROM node_imgs INNER JOIN nodes ON node_imgs.name = nodes.name"):
-		nodesDone.add(otolId)
-	# Get existing node-associated images
-	for (imgId, imgSrc) in dbCur.execute("SELECT id, src from images"):
-		imgsDone.add((imgId, imgSrc))
-	print(f"Found {len(nodesDone)} nodes and {len(imgsDone)} images to skip")
-
-# Set SIGINT handler
-interrupted = False
-def onSigint(sig, frame):
-	global interrupted
-	interrupted = True
-signal.signal(signal.SIGINT, onSigint)
-
-print("Iterating through input images")
-def quit():
-	print("Closing databases")
-	dbCon.commit()
-	dbCon.close()
-	eolCon.close()
-	enwikiCon.close()
-	sys.exit(0)
-def convertImage(imgPath, outPath):
-	print(f"Converting {imgPath} to {outPath}")
-	if os.path.exists(outPath):
-		print(f"ERROR: Output image already exists")
-		return False
-	try:
-		completedProcess = subprocess.run(
-			['npx', 'smartcrop-cli', '--width', str(IMG_OUT_SZ), '--height', str(IMG_OUT_SZ), imgPath, outPath],
-			stdout=subprocess.DEVNULL
-		)
-	except Exception as e:
-		print(f"ERROR: Exception while attempting to run smartcrop: {e}")
-		return False
-	if completedProcess.returncode != 0:
-		print(f"ERROR: smartcrop had exit status {completedProcess.returncode}")
-		return False
-	return True
-print("Processing picked-images")
-for (otolId, imgData) in nodeToPickedImg.items():
-	# Check for SIGINT event
-	if interrupted:
-		print("Exiting")
-		quit()
-	# Skip if already processed
-	if otolId in nodesDone:
-		continue
-	# Convert image
-	if genImgFiles:
-		success = convertImage(pickedImgsDir + imgData["filename"], outDir + otolId + ".jpg")
-		if not success:
-			quit()
-	else:
-		print(f"Processing {imgData['nodeName']}: {otolId}.jpg")
-	# Add entry to db
-	if (imgData["id"], "picked") not in imgsDone:
-		dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
-			(imgData["id"], "picked", imgData["url"], imgData["license"], imgData["artist"], imgData["credit"]))
-		imgsDone.add((imgData["id"], "picked"))
-	dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (imgData["nodeName"], imgData["id"], "picked"))
-	nodesDone.add(otolId)
-print("Processing images from eol and enwiki")
-iterNum = 0
-with open(imgListFile) as file:
-	for line in file:
-		iterNum += 1
-		# Check for SIGINT event
-		if interrupted:
-			print("Exiting")
-			break
-		# Skip lines without an image path
-		if line.find(" ") == -1:
-			continue
-		# Get filenames
-		(otolId, _, imgPath) = line.rstrip().partition(" ")
-		# Skip if already processed
-		if otolId in nodesDone:
-			continue
-		# Convert image
-		if genImgFiles:
-			success = convertImage(imgPath, outDir + otolId + ".jpg")
-			if not success:
-				break
-		else:
-			if iterNum % 1e4 == 0:
-				print(f"At iteration {iterNum}")
-		# Add entry to db
-		(nodeName,) = dbCur.execute("SELECT name FROM nodes WHERE id = ?", (otolId,)).fetchone()
-		fromEol = imgPath.startswith("eol/")
-		imgName = os.path.basename(os.path.normpath(imgPath)) # Get last path component
-		imgName = os.path.splitext(imgName)[0] # Remove extension
-		if fromEol:
-			eolId, _, contentId = imgName.partition(" ")
-			eolId, contentId = (int(eolId), int(contentId))
-			if (eolId, "eol") not in imgsDone:
-				query = "SELECT source_url, license, copyright_owner FROM images WHERE content_id = ?"
-				row = eolCur.execute(query, (contentId,)).fetchone()
-				if row == None:
-					print(f"ERROR: No image record for EOL ID {eolId}, content ID {contentId}")
-					break
-				(url, license, owner) = row
-				dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
-					(eolId, "eol", url, license, owner, ""))
-				imgsDone.add((eolId, "eol"))
-			dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, eolId, "eol"))
-		else:
-			enwikiId = int(imgName)
-			if (enwikiId, "enwiki") not in imgsDone:
-				query = "SELECT name, license, artist, credit FROM" \
-					" page_imgs INNER JOIN imgs ON page_imgs.img_name = imgs.name" \
-					" WHERE page_imgs.page_id = ?"
-				row = enwikiCur.execute(query, (enwikiId,)).fetchone()
-				if row == None:
-					print(f"ERROR: No image record for enwiki ID {enwikiId}")
-					break
-				(name, license, artist, credit) = row
-				url = "https://en.wikipedia.org/wiki/File:" + urllib.parse.quote(name)
-				dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
-					(enwikiId, "enwiki", url, license, artist, credit))
-				imgsDone.add((enwikiId, "enwiki"))
-			dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, enwikiId, "enwiki"))
-# Close dbs
-quit()
diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py
deleted file mode 100755
index a8e1322..0000000
--- a/backend/data/genLinkedImgs.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Look for nodes without images in the database, and tries to
-associate them with images from their children.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-dbFile = "data.db"
-compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]")
-upPropagateCompoundImgs = False
-
-print("Opening databases")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)")
-
-print("Getting nodes with images")
-resolvedNodes = {} # Will map node names to otol IDs with a usable image
-query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name"
-for (name, otolId) in dbCur.execute(query):
-	resolvedNodes[name] = otolId
-print(f"Found {len(resolvedNodes)}")
-
-print("Iterating through nodes, trying to resolve images for ancestors")
-nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images
-processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used
-parentToChosenTips = {} # used to prefer images from children with more tips
-iterNum = 0
-while len(resolvedNodes) > 0:
-	iterNum += 1
-	if iterNum % 1e3 == 0:
-		print(f"At iteration {iterNum}")
-	# Get next node
-	(nodeName, otolId) = resolvedNodes.popitem()
-	processedNodes[nodeName] = otolId
-	# Traverse upwards, resolving ancestors if able
-	while True:
-		# Get parent
-		row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
-		if row == None or row[0] in processedNodes or row[0] in resolvedNodes:
-			break
-		parent = row[0]
-		# Get parent data
-		if parent not in nodesToResolve:
-			childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))]
-			query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames)))
-			childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)]
-			childObjs.sort(key=lambda x: x["tips"], reverse=True)
-			nodesToResolve[parent] = childObjs
-		else:
-			childObjs = nodesToResolve[parent]
-		# Check if highest-tips child
-		if (childObjs[0]["name"] == nodeName):
-			# Resolve parent, and continue from it
-			dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, otolId))
-			del nodesToResolve[parent]
-			processedNodes[parent] = otolId
-			parentToChosenTips[parent] = childObjs[0]["tips"]
-			nodeName = parent
-			continue
-		else:
-			# Mark child as a potential choice
-			childObj = next(c for c in childObjs if c["name"] == nodeName)
-			childObj["otolId"] = otolId
-			break
-	# When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve
-	if len(resolvedNodes) == 0:
-		for (name, childObjs) in nodesToResolve.items():
-			childObj = next(c for c in childObjs if c["otolId"] != None)
-			resolvedNodes[name] = childObj["otolId"]
-			parentToChosenTips[name] = childObj["tips"]
-			dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"]))
-		nodesToResolve.clear()
-
-print("Replacing linked-images for compound nodes")
-iterNum = 0
-for nodeName in processedNodes.keys():
-	iterNum += 1
-	if iterNum % 1e4 == 0:
-		print(f"At iteration {iterNum}")
-	#
-	match = compoundNameRegex.fullmatch(nodeName)
-	if match != None:
-		# Replace associated image with subname images
-		(subName1, subName2) = match.group(1,2)
-		otolIdPair = ["", ""]
-		if subName1 in processedNodes:
-			otolIdPair[0] = processedNodes[subName1]
-		if subName2 in processedNodes:
-			otolIdPair[1] = processedNodes[subName2]
-		# Use no image if both subimages not found
-		if otolIdPair[0] == "" and otolIdPair[1] == "":
-			dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,))
-			continue
-		# Add to db
-		dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
-			(otolIdPair[0] + "," + otolIdPair[1], nodeName))
-		# Possibly repeat operation upon parent/ancestors
-		if upPropagateCompoundImgs:
-			while True:
-				# Get parent
-				row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
-				if row != None:
-					parent = row[0]
-					# Check num tips
-					(numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone()
-					if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips:
-						# Replace associated image
-						dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
-							(otolIdPair[0] + "," + otolIdPair[1], parent))
-						nodeName = parent
-						continue
-				break
-
-print("Closing databases")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/genOtolData.py b/backend/data/genOtolData.py
deleted file mode 100755
index b5e0055..0000000
--- a/backend/data/genOtolData.py
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os
-import json, sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Reads files describing a tree-of-life from an 'Open Tree of Life' release,
-and stores tree information in a database.
-
-Reads a labelled_supertree_ottnames.tre file, which is assumed to have this format:
-    The tree-of-life is represented in Newick format, which looks like: (n1,n2,(n3,n4)n5)n6
-		The root node is named n6, and has children n1, n2, and n5.
-    Name examples include: Homo_sapiens_ott770315, mrcaott6ott22687, 'Oxalis san-miguelii ott5748753', 
-		'ott770315' and 'mrcaott6ott22687' are node IDs. The latter is for a 'compound node'.
-		The node with ID 'ott770315' will get the name 'homo sapiens'.
-		A compound node will get a name composed from it's sub-nodes (eg: [name1 + name2]).
-	It is possible for multiple nodes to have the same name.
-		In these cases, extra nodes will be named sequentially, as 'name1 [2]', 'name1 [3]', etc.
-Reads an annotations.json file, which is assumed to have this format:
-    Holds a JSON object, whose 'nodes' property maps node IDs to objects holding information about that node,
-    such as the properties 'supported_by' and 'conflicts_with', which list phylogenetic trees that
-	support/conflict with the node's placement.
-Reads from a picked-names file, if present, which specifies name and node ID pairs.
-	These help resolve cases where multiple nodes share the same name.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-treeFile = "otol/labelled_supertree_ottnames.tre" # Had about 2.5e9 nodes
-annFile = "otol/annotations.json"
-dbFile = "data.db"
-nodeMap = {} # Maps node IDs to node objects
-nameToFirstId = {} # Maps node names to first found ID (names might have multiple IDs)
-dupNameToIds = {} # Maps names of nodes with multiple IDs to those IDs
-pickedNamesFile = "pickedOtolNames.txt"
-
-class Node:
-	" Represents a tree-of-life node "
-	def __init__(self, name, childIds, parentId, tips, pSupport):
-		self.name = name
-		self.childIds = childIds
-		self.parentId = parentId
-		self.tips = tips
-		self.pSupport = pSupport
-
-print("Parsing tree file")
-# Read file
-data = None
-with open(treeFile) as file:
-	data = file.read()
-dataIdx = 0
-# Parse content
-iterNum = 0
-def parseNewick():
-	" Parses a node using 'data' and 'dataIdx', updates nodeMap accordingly, and returns the node's ID "
-	global data, dataIdx, iterNum
-	iterNum += 1
-	if iterNum % 1e5 == 0:
-		print(f"At iteration {iterNum}")
-	# Check for EOF
-	if dataIdx == len(data):
-		raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
-	# Check for node
-	if data[dataIdx] == "(": # parse inner node
-		dataIdx += 1
-		childIds = []
-		while True:
-			# Read child
-			childId = parseNewick()
-			childIds.append(childId)
-			if (dataIdx == len(data)):
-				raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
-			# Check for next child
-			if (data[dataIdx] == ","):
-				dataIdx += 1
-				continue
-			else:
-				# Get node name and id
-				dataIdx += 1 # Consume an expected ')'
-				name, id = parseNewickName()
-				updateNameMaps(name, id)
-				# Get child num-tips total
-				tips = 0
-				for childId in childIds:
-					tips += nodeMap[childId].tips
-				# Add node to nodeMap
-				nodeMap[id] = Node(name, childIds, None, tips, False)
-				# Update childrens' parent reference
-				for childId in childIds:
-					nodeMap[childId].parentId = id
-				return id
-	else: # Parse node name
-		name, id = parseNewickName()
-		updateNameMaps(name, id)
-		nodeMap[id] = Node(name, [], None, 1, False)
-		return id
-def parseNewickName():
-	" Parses a node name using 'data' and 'dataIdx', and returns a (name, id) pair "
-	global data, dataIdx
-	name = None
-	end = dataIdx
-	# Get name
-	if (end < len(data) and data[end] == "'"): # Check for quoted name
-		end += 1
-		inQuote = True
-		while end < len(data):
-			if (data[end] == "'"):
-				if end + 1 < len(data) and data[end + 1] == "'": # Account for '' as escaped-quote
-					end += 2
-					continue
-				else:
-					end += 1
-					inQuote = False
-					break
-			end += 1
-		if inQuote:
-			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
-		name = data[dataIdx:end]
-		dataIdx = end
-	else:
-		while end < len(data) and not re.match(r"[(),]", data[end]):
-			end += 1
-		if (end == dataIdx):
-			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
-		name = data[dataIdx:end].rstrip()
-		if end == len(data): # Ignore trailing input semicolon
-			name = name[:-1]
-		dataIdx = end
-	# Convert to (name, id)
-	name = name.lower()
-	if name.startswith("mrca"):
-		return (name, name)
-	elif name[0] == "'":
-		match = re.fullmatch(r"'([^\\\"]+) (ott\d+)'", name)
-		if match == None:
-			raise Exception(f"ERROR: invalid name \"{name}\"")
-		name = match.group(1).replace("''", "'")
-		return (name, match.group(2))
-	else:
-		match = re.fullmatch(r"([^\\\"]+)_(ott\d+)", name)
-		if match == None:
-			raise Exception(f"ERROR: invalid name \"{name}\"")
-		return (match.group(1).replace("_", " "), match.group(2))
-def updateNameMaps(name, id):
-	global nameToFirstId, dupNameToIds
-	if name not in nameToFirstId:
-		nameToFirstId[name] = id
-	else:
-		if name not in dupNameToIds:
-			dupNameToIds[name] = [nameToFirstId[name], id]
-		else:
-			dupNameToIds[name].append(id)
-rootId = parseNewick()
-
-print("Resolving duplicate names")
-# Read picked-names file
-nameToPickedId = {}
-if os.path.exists(pickedNamesFile):
-	with open(pickedNamesFile) as file:
-		for line in file:
-			(name, _, otolId) = line.rstrip().partition("|")
-			nameToPickedId[name] = otolId
-# Resolve duplicates
-for (dupName, ids) in dupNameToIds.items():
-	# Check for picked id
-	if dupName in nameToPickedId:
-		idToUse = nameToPickedId[dupName]
-	else:
-		# Get conflicting node with most tips
-		tipNums = [nodeMap[id].tips for id in ids]
-		maxIdx = tipNums.index(max(tipNums))
-		idToUse = ids[maxIdx]
-	# Adjust name of other conflicting nodes
-	counter = 2
-	for id in ids:
-		if id != idToUse:
-			nodeMap[id].name += f" [{counter}]"
-			counter += 1
-
-print("Changing mrca* names")
-def convertMrcaName(id):
-	node = nodeMap[id]
-	name = node.name
-	childIds = node.childIds
-	if len(childIds) < 2:
-		print(f"WARNING: MRCA node \"{name}\" has less than 2 children")
-		return
-	# Get 2 children with most tips
-	childTips = [nodeMap[id].tips for id in childIds]
-	maxIdx1 = childTips.index(max(childTips))
-	childTips[maxIdx1] = 0
-	maxIdx2 = childTips.index(max(childTips))
-	childId1 = childIds[maxIdx1]
-	childId2 = childIds[maxIdx2]
-	childName1 = nodeMap[childId1].name
-	childName2 = nodeMap[childId2].name
-	# Check for mrca* child names
-	if childName1.startswith("mrca"):
-		childName1 = convertMrcaName(childId1)
-	if childName2.startswith("mrca"):
-		childName2 = convertMrcaName(childId2)
-	# Check for composite names
-	match = re.fullmatch(r"\[(.+) \+ (.+)]", childName1)
-	if match != None:
-		childName1 = match.group(1)
-	match = re.fullmatch(r"\[(.+) \+ (.+)]", childName2)
-	if match != None:
-		childName2 = match.group(1)
-	# Create composite name
-	node.name = f"[{childName1} + {childName2}]"
-	return childName1
-for (id, node) in nodeMap.items():
-	if node.name.startswith("mrca"):
-		convertMrcaName(id)
-
-print("Parsing annotations file")
-# Read file
-data = None
-with open(annFile) as file:
-	data = file.read()
-obj = json.loads(data)
-nodeAnnsMap = obj["nodes"]
-# Find relevant annotations
-for (id, node) in nodeMap.items():
-	# Set has-support value using annotations
-	if id in nodeAnnsMap:
-		nodeAnns = nodeAnnsMap[id]
-		supportQty = len(nodeAnns["supported_by"]) if "supported_by" in nodeAnns else 0
-		conflictQty = len(nodeAnns["conflicts_with"]) if "conflicts_with" in nodeAnns else 0
-		node.pSupport = supportQty > 0 and conflictQty == 0
-
-print("Creating nodes and edges tables")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-dbCur.execute("CREATE TABLE nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
-dbCur.execute("CREATE INDEX nodes_idx_nc ON nodes(name COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
-dbCur.execute("CREATE INDEX edges_child_idx ON edges(child)")
-for (otolId, node) in nodeMap.items():
-	dbCur.execute("INSERT INTO nodes VALUES (?, ?, ?)", (node.name, otolId, node.tips))
-	for childId in node.childIds:
-		childNode = nodeMap[childId]
-		dbCur.execute("INSERT INTO edges VALUES (?, ?, ?)",
-			(node.name, childNode.name, 1 if childNode.pSupport else 0))
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/genReducedTrees.py b/backend/data/genReducedTrees.py
deleted file mode 100755
index a921be4..0000000
--- a/backend/data/genReducedTrees.py
+++ /dev/null
@@ -1,329 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os.path, re
-import json, sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]} [tree1]
-
-Creates reduced versions of the tree in the database:
-- A 'picked nodes' tree:
-    Created from a minimal set of node names read from a file,
-    possibly with some extra randmly-picked children.
-- An 'images only' tree:
-    Created by removing nodes without an image or presence in the
-    'picked' tree.
-- A 'weakly trimmed' tree:
-    Created by removing nodes that lack an image or description, or
-    presence in the 'picked' tree. And, for nodes with 'many' children,
-    removing some more, despite any node descriptions.
-
-If tree1 is specified, as 'picked', 'images', or 'trimmed', only that
-tree is generated.
-"""
-if len(sys.argv) > 2 or len(sys.argv) == 2 and re.fullmatch(r"picked|images|trimmed", sys.argv[1]) == None:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-tree = sys.argv[1] if len(sys.argv) > 1 else None
-dbFile = "data.db"
-pickedNodesFile = "pickedNodes.txt"
-COMP_NAME_REGEX = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes
-
-class Node:
-	def __init__(self, id, children, parent, tips, pSupport):
-		self.id = id
-		self.children = children
-		self.parent = parent
-		self.tips = tips
-		self.pSupport = pSupport
-
-print("Opening database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-def genPickedNodeTree(dbCur, pickedNames, rootName):
-	global COMP_NAME_REGEX
-	PREF_NUM_CHILDREN = 3 # Include extra children up to this limit
-	nodeMap = {} # Maps node names to Nodes
-	print("Getting ancestors")
-	nodeMap = genNodeMap(dbCur, pickedNames, 100)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Removing composite nodes")
-	removedNames = removeCompositeNodes(nodeMap)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Removing 'collapsible' nodes")
-	temp = removeCollapsibleNodes(nodeMap, pickedNames)
-	removedNames.update(temp)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Adding some additional nearby children")
-	namesToAdd = []
-	iterNum = 0
-	for (name, node) in nodeMap.items():
-		iterNum += 1
-		if iterNum % 100 == 0:
-			print(f"At iteration {iterNum}")
-		#
-		numChildren = len(node.children)
-		if numChildren < PREF_NUM_CHILDREN:
-			children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))]
-			newChildren = []
-			for n in children:
-				if n in nodeMap or n in removedNames:
-					continue
-				if COMP_NAME_REGEX.fullmatch(n) != None:
-					continue
-				if dbCur.execute("SELECT name from node_imgs WHERE name = ?", (n,)).fetchone() == None and \
-					dbCur.execute("SELECT name from linked_imgs WHERE name = ?", (n,)).fetchone() == None:
-					continue
-				newChildren.append(n)
-			newChildNames = newChildren[:(PREF_NUM_CHILDREN - numChildren)]
-			node.children.extend(newChildNames)
-			namesToAdd.extend(newChildNames)
-	for name in namesToAdd:
-		parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone()
-		(id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone()
-		parent = None if parent == "" else parent
-		nodeMap[name] = Node(id, [], parent, 0, pSupport == 1)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Updating 'tips' values")
-	updateTips(rootName, nodeMap)
-	print("Creating table")
-	addTreeTables(nodeMap, dbCur, "p")
-def genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName):
-	print("Getting ancestors")
-	nodeMap = genNodeMap(dbCur, nodesWithImgOrPicked, 1e4)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Removing composite nodes")
-	removeCompositeNodes(nodeMap)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Removing 'collapsible' nodes")
-	removeCollapsibleNodes(nodeMap, {})
-	print(f"Result has {len(nodeMap)} nodes")
-	print(f"Updating 'tips' values") # Needed for next trimming step
-	updateTips(rootName, nodeMap)
-	print(f"Trimming from nodes with 'many' children")
-	trimIfManyChildren(nodeMap, rootName, 300, pickedNames)
-	print(f"Result has {len(nodeMap)} nodes")
-	print(f"Updating 'tips' values")
-	updateTips(rootName, nodeMap)
-	print("Creating table")
-	addTreeTables(nodeMap, dbCur, "i")
-def genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName):
-	print("Getting ancestors")
-	nodeMap = genNodeMap(dbCur, nodesWithImgDescOrPicked, 1e5)
-	print(f"Result has {len(nodeMap)} nodes")
-	print("Getting nodes to 'strongly keep'")
-	iterNum = 0
-	nodesFromImgOrPicked = set()
-	for name in nodesWithImgOrPicked:
-		iterNum += 1
-		if iterNum % 1e4 == 0:
-			print(f"At iteration {iterNum}")
-		#
-		while name != None:
-			if name not in nodesFromImgOrPicked:
-				nodesFromImgOrPicked.add(name)
-				name = nodeMap[name].parent
-			else:
-				break
-	print(f"Node set has {len(nodesFromImgOrPicked)} nodes")
-	print("Removing 'collapsible' nodes")
-	removeCollapsibleNodes(nodeMap, nodesWithImgDescOrPicked)
-	print(f"Result has {len(nodeMap)} nodes")
-	print(f"Updating 'tips' values") # Needed for next trimming step
-	updateTips(rootName, nodeMap)
-	print(f"Trimming from nodes with 'many' children")
-	trimIfManyChildren(nodeMap, rootName, 600, nodesFromImgOrPicked)
-	print(f"Result has {len(nodeMap)} nodes")
-	print(f"Updating 'tips' values")
-	updateTips(rootName, nodeMap)
-	print("Creating table")
-	addTreeTables(nodeMap, dbCur, "t")
-# Helper functions
-def genNodeMap(dbCur, nameSet, itersBeforePrint = 1):
-	" Returns a subtree that includes nodes in 'nameSet', as a name-to-Node map "
-	nodeMap = {}
-	iterNum = 0
-	for name in nameSet:
-		iterNum += 1
-		if iterNum % itersBeforePrint == 0:
-			print(f"At iteration {iterNum}")
-		#
-		prevName = None
-		while name != None:
-			if name not in nodeMap:
-				# Add node
-				(id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone()
-				row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone()
-				parent = None if row == None or row[0] == "" else row[0]
-				pSupport = row == None or row[1] == 1
-				children = [] if prevName == None else [prevName]
-				nodeMap[name] = Node(id, children, parent, 0, pSupport)
-				# Iterate to parent
-				prevName = name
-				name = parent
-			else:
-				# Just add as child
-				if prevName != None:
-					nodeMap[name].children.append(prevName)
-				break
-	return nodeMap
-def removeCompositeNodes(nodeMap):
-	" Given a tree, removes composite-name nodes, and returns the removed nodes' names "
-	global COMP_NAME_REGEX
-	namesToRemove = set()
-	for (name, node) in nodeMap.items():
-		parent = node.parent
-		if parent != None and COMP_NAME_REGEX.fullmatch(name) != None:
-			# Connect children to parent
-			nodeMap[parent].children.remove(name)
-			nodeMap[parent].children.extend(node.children)
-			for n in node.children:
-				nodeMap[n].parent = parent
-				nodeMap[n].pSupport &= node.pSupport
-			# Remember for removal
-			namesToRemove.add(name)
-	for name in namesToRemove:
-		del nodeMap[name]
-	return namesToRemove
-def removeCollapsibleNodes(nodeMap, nodesToKeep = {}):
-	""" Given a tree, removes single-child parents, then only-childs,
-		with given exceptions, and returns the set of removed nodes' names """
-	namesToRemove = set()
-	# Remove single-child parents
-	for (name, node) in nodeMap.items():
-		if len(node.children) == 1 and node.parent != None and name not in nodesToKeep:
-			# Connect parent and children
-			parent = node.parent
-			child = node.children[0]
-			nodeMap[parent].children.remove(name)
-			nodeMap[parent].children.append(child)
-			nodeMap[child].parent = parent
-			nodeMap[child].pSupport &= node.pSupport
-			# Remember for removal
-			namesToRemove.add(name)
-	for name in namesToRemove:
-		del nodeMap[name]
-	# Remove only-childs (not redundant because 'nodesToKeep' can cause single-child parents to be kept)
-	namesToRemove.clear()
-	for (name, node) in nodeMap.items():
-		isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1
-		if isOnlyChild and name not in nodesToKeep:
-			# Connect parent and children
-			parent = node.parent
-			nodeMap[parent].children = node.children
-			for n in node.children:
-				nodeMap[n].parent = parent
-				nodeMap[n].pSupport &= node.pSupport
-			# Remember for removal
-			namesToRemove.add(name)
-	for name in namesToRemove:
-		del nodeMap[name]
-	#
-	return namesToRemove
-def trimIfManyChildren(nodeMap, rootName, childThreshold, nodesToKeep = {}):
-	namesToRemove = set()
-	def findTrimmables(nodeName):
-		nonlocal nodeMap, nodesToKeep
-		node = nodeMap[nodeName]
-		if len(node.children) > childThreshold:
-			numToTrim = len(node.children) - childThreshold
-			# Try removing nodes, preferring those with less tips
-			candidatesToTrim = [n for n in node.children if n not in nodesToKeep]
-			childToTips = {n: nodeMap[n].tips for n in candidatesToTrim}
-			candidatesToTrim.sort(key=lambda n: childToTips[n], reverse=True)
-			childrenToRemove = set(candidatesToTrim[-numToTrim:])
-			node.children = [n for n in node.children if n not in childrenToRemove]
-			# Mark nodes for deletion
-			for n in childrenToRemove:
-				markForRemoval(n)
-		# Recurse on children
-		for n in node.children:
-			findTrimmables(n)
-	def markForRemoval(nodeName):
-		nonlocal nodeMap, namesToRemove
-		namesToRemove.add(nodeName)
-		for child in nodeMap[nodeName].children:
-			markForRemoval(child)
-	findTrimmables(rootName)
-	for nodeName in namesToRemove:
-		del nodeMap[nodeName]
-def updateTips(nodeName, nodeMap):
-	" Updates the 'tips' values for a node and it's descendants, returning the node's new 'tips' value "
-	node = nodeMap[nodeName]
-	tips = sum([updateTips(childName, nodeMap) for childName in node.children])
-	tips = max(1, tips)
-	node.tips = tips
-	return tips
-def addTreeTables(nodeMap, dbCur, suffix):
-	" Adds a tree to the database, as tables nodes_X and edges_X, where X is the given suffix "
-	nodesTbl = f"nodes_{suffix}"
-	edgesTbl = f"edges_{suffix}"
-	dbCur.execute(f"CREATE TABLE {nodesTbl} (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
-	dbCur.execute(f"CREATE INDEX {nodesTbl}_idx_nc ON {nodesTbl}(name COLLATE NOCASE)")
-	dbCur.execute(f"CREATE TABLE {edgesTbl} (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
-	dbCur.execute(f"CREATE INDEX {edgesTbl}_child_idx ON {edgesTbl}(child)")
-	for (name, node) in nodeMap.items():
-		dbCur.execute(f"INSERT INTO {nodesTbl} VALUES (?, ?, ?)", (name, node.id, node.tips))
-		for childName in node.children:
-			pSupport = 1 if nodeMap[childName].pSupport else 0
-			dbCur.execute(f"INSERT INTO {edgesTbl} VALUES (?, ?, ?)", (name, childName, pSupport))
-
-print(f"Finding root node")
-query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
-(rootName,) = dbCur.execute(query).fetchone()
-print(f"Found \"{rootName}\"")
-
-print('=== Getting picked-nodes ===')
-pickedNames = set()
-pickedTreeExists = False
-if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='nodes_p'").fetchone() == None:
-	print(f"Reading from {pickedNodesFile}")
-	with open(pickedNodesFile) as file:
-		for line in file:
-			name = line.rstrip()
-			row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone()
-			if row == None:
-				row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone()
-			if row != None:
-				pickedNames.add(row[0])
-	if len(pickedNames) == 0:
-		raise Exception("ERROR: No picked names found")
-else:
-	pickedTreeExists = True
-	print("Picked-node tree already exists")
-	if tree == 'picked':
-		sys.exit()
-	for (name,) in dbCur.execute("SELECT name FROM nodes_p"):
-		pickedNames.add(name)
-print(f"Found {len(pickedNames)} names")
-
-if (tree == 'picked' or tree == None) and not pickedTreeExists:
-	print("=== Generating picked-nodes tree ===")
-	genPickedNodeTree(dbCur, pickedNames, rootName)
-if tree != 'picked':
-	print("=== Finding 'non-low significance' nodes ===")
-	nodesWithImgOrPicked = set()
-	nodesWithImgDescOrPicked = set()
-	print("Finding nodes with descs")
-	for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc
-		nodesWithImgDescOrPicked.add(name)
-	print("Finding nodes with images")
-	for (name,) in dbCur.execute("SELECT name FROM node_imgs"):
-		nodesWithImgDescOrPicked.add(name)
-		nodesWithImgOrPicked.add(name)
-	print("Adding picked nodes")
-	for name in pickedNames:
-		nodesWithImgDescOrPicked.add(name)
-		nodesWithImgOrPicked.add(name)
-	if tree == 'images' or tree == None:
-		print("=== Generating images-only tree ===")
-		genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName)
-	if tree == 'trimmed' or tree == None:
-		print("=== Generating weakly-trimmed tree ===")
-		genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName)
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/otol/README.md b/backend/data/otol/README.md
deleted file mode 100644
index 4be2fd2..0000000
--- a/backend/data/otol/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-Files
-=====
--   opentree13.4tree.tgz <br>
-    Obtained from <https://tree.opentreeoflife.org/about/synthesis-release/v13.4>.
-    Contains tree data from the [Open Tree of Life](https://tree.opentreeoflife.org/about/open-tree-of-life).
--   labelled\_supertree\_ottnames.tre <br>
-    Extracted from the .tgz file. Describes the structure of the tree.
--   annotations.json
-    Extracted from the .tgz file. Contains additional attributes of tree
-    nodes. Used for finding out which nodes have 'phylogenetic support'.
diff --git a/backend/data/pickedImgs/README.md b/backend/data/pickedImgs/README.md
deleted file mode 100644
index dfe192b..0000000
--- a/backend/data/pickedImgs/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-This directory holds additional image files to use for tree-of-life nodes,
-on top of those from EOL and Wikipedia.
-
-Possible Files
-==============
--   (Image files)
--   imgData.txt <br>
-    Contains lines with the format `filename|url|license|artist|credit`.
-    The filename should consist of a node name, with an image extension.
-    Other fields correspond to those in the `images` table (see ../README.md).
diff --git a/backend/data/reviewImgsToGen.py b/backend/data/reviewImgsToGen.py
deleted file mode 100755
index de592f5..0000000
--- a/backend/data/reviewImgsToGen.py
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/python3
-
-import sys, re, os, time
-import sqlite3
-import tkinter as tki
-from tkinter import ttk
-import PIL
-from PIL import ImageTk, Image, ImageOps
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Provides a GUI that displays, for each node in the database, associated
-images from EOL and Wikipedia, and allows choosing which to use. Writes
-choice data to a text file with lines of the form 'otolId1 imgPath1', or
-'otolId1', where no path indicates a choice of no image.
-
-The program can be closed, and run again to continue from the last choice.
-The program looks for an existing output file to determine what choices
-have already been made.
-"""
-if len(sys.argv) > 1:
-	print(usageInfo, file=sys.stderr)
-	sys.exit(1)
-
-eolImgDir = "eol/imgs/"
-enwikiImgDir = "enwiki/imgs/"
-dbFile = "data.db"
-outFile = "imgList.txt"
-IMG_DISPLAY_SZ = 400
-PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135))
-onlyReviewPairs = True
-
-print("Opening database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-nodeToImgs = {} # Maps otol-ids to arrays of image paths
-print("Iterating through images from EOL")
-if os.path.exists(eolImgDir):
-	for filename in os.listdir(eolImgDir):
-		# Get associated EOL ID
-		eolId, _, _ = filename.partition(" ")
-		query = "SELECT nodes.id FROM nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name WHERE eol_ids.id = ?"
-		# Get associated node IDs
-		found = False
-		for (otolId,) in dbCur.execute(query, (int(eolId),)):
-			if otolId not in nodeToImgs:
-				nodeToImgs[otolId] = []
-			nodeToImgs[otolId].append(eolImgDir + filename)
-			found = True
-		if not found:
-			print(f"WARNING: No node found for {eolImgDir}{filename}")
-print(f"Result: {len(nodeToImgs)} nodes with images")
-print("Iterating through images from Wikipedia")
-if os.path.exists(enwikiImgDir):
-	for filename in os.listdir(enwikiImgDir):
-		# Get associated page ID
-		(wikiId, _, _) = filename.partition(".")
-		# Get associated node IDs
-		query = "SELECT nodes.id FROM nodes INNER JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id = ?"
-		found = False
-		for (otolId,) in dbCur.execute(query, (int(wikiId),)):
-			if otolId not in nodeToImgs:
-				nodeToImgs[otolId] = []
-			nodeToImgs[otolId].append(enwikiImgDir + filename)
-			found = True
-		if not found:
-			print(f"WARNING: No node found for {enwikiImgDir}{filename}")
-print(f"Result: {len(nodeToImgs)} nodes with images")
-print("Filtering out already-made image choices")
-oldSz = len(nodeToImgs)
-if os.path.exists(outFile):
-	with open(outFile) as file:
-		for line in file:
-			line = line.rstrip()
-			if " " in line:
-				line = line[:line.find(" ")]
-			del nodeToImgs[line]
-print(f"Filtered out {oldSz - len(nodeToImgs)} entries")
-
-class ImgReviewer:
-	" Provides the GUI for reviewing images "
-	def __init__(self, root, nodeToImgs):
-		self.root = root
-		root.title("Image Reviewer")
-		# Setup main frame
-		mainFrame = ttk.Frame(root, padding="5 5 5 5")
-		mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S))
-		root.columnconfigure(0, weight=1)
-		root.rowconfigure(0, weight=1)
-		# Set up images-to-be-reviewed frames
-		self.eolImg = ImageTk.PhotoImage(PLACEHOLDER_IMG)
-		self.enwikiImg = ImageTk.PhotoImage(PLACEHOLDER_IMG)
-		self.labels = []
-		for i in (0, 1):
-			frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ)
-			frame.grid(column=i, row=0)
-			label = ttk.Label(frame, image=self.eolImg if i == 0 else self.enwikiImg)
-			label.grid(column=0, row=0)
-			self.labels.append(label)
-		# Add padding
-		for child in mainFrame.winfo_children():
-			child.grid_configure(padx=5, pady=5)
-		# Add keyboard bindings
-		root.bind("<q>", self.quit)
-		root.bind("<Key-j>", lambda evt: self.accept(0))
-		root.bind("<Key-k>", lambda evt: self.accept(1))
-		root.bind("<Key-l>", lambda evt: self.reject())
-		# Set fields
-		self.nodeImgsList = list(nodeToImgs.items())
-		self.listIdx = -1
-		self.otolId = None
-		self.eolImgPath = None
-		self.enwikiImgPath = None
-		self.numReviewed = 0
-		self.startTime = time.time()
-		# Initialise images to review
-		self.getNextImgs()
-	def getNextImgs(self):
-		" Updates display with new images to review, or ends program "
-		# Get next image paths
-		while True:
-			self.listIdx += 1
-			if self.listIdx == len(self.nodeImgsList):
-				print("No more images to review. Exiting program.")
-				self.quit()
-				return
-			self.otolId, imgPaths = self.nodeImgsList[self.listIdx]
-			# Potentially skip user choice
-			if onlyReviewPairs and len(imgPaths) == 1:
-				with open(outFile, 'a') as file:
-					file.write(f"{self.otolId} {imgPaths[0]}\n")
-				continue
-			break
-		# Update displayed images
-		self.eolImgPath = self.enwikiImgPath = None
-		imageOpenError = False
-		for imgPath in imgPaths:
-			img = None
-			try:
-				img = Image.open(imgPath)
-				img = ImageOps.exif_transpose(img)
-			except PIL.UnidentifiedImageError:
-				print(f"UnidentifiedImageError for {imgPath}")
-				imageOpenError = True
-				continue
-			if imgPath.startswith("eol/"):
-				self.eolImgPath = imgPath
-				self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
-			elif imgPath.startswith("enwiki/"):
-				self.enwikiImgPath = imgPath
-				self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
-			else:
-				print(f"Unexpected image path {imgPath}")
-				self.quit()
-				return
-		# Re-iterate if all image paths invalid
-		if self.eolImgPath == None and self.enwikiImgPath == None:
-			if imageOpenError:
-				self.reject()
-			self.getNextImgs()
-			return
-		# Add placeholder images
-		if self.eolImgPath == None:
-			self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
-		elif self.enwikiImgPath == None:
-			self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
-		# Update image-frames
-		self.labels[0].config(image=self.eolImg)
-		self.labels[1].config(image=self.enwikiImg)
-		# Update title
-		title = f"Images for otol ID {self.otolId}"
-		query = "SELECT names.alt_name FROM" \
-			" nodes INNER JOIN names ON nodes.name = names.name" \
-			" WHERE nodes.id = ? and pref_alt = 1"
-		row = dbCur.execute(query, (self.otolId,)).fetchone()
-		if row != None:
-			title += f", aka {row[0]}"
-		title += f" ({self.listIdx + 1} out of {len(self.nodeImgsList)})"
-		self.root.title(title)
-	def accept(self, imgIdx):
-		" React to a user selecting an image "
-		imgPath = self.eolImgPath if imgIdx == 0 else self.enwikiImgPath
-		if imgPath == None:
-			print("Invalid selection")
-			return
-		with open(outFile, 'a') as file:
-			file.write(f"{self.otolId} {imgPath}\n")
-		self.numReviewed += 1
-		self.getNextImgs()
-	def reject(self):
-		" React to a user rejecting all images of a set "
-		with open(outFile, 'a') as file:
-			file.write(f"{self.otolId}\n")
-		self.numReviewed += 1
-		self.getNextImgs()
-	def quit(self, e = None):
-		global dbCon
-		print(f"Number reviewed: {self.numReviewed}")
-		timeElapsed = time.time() - self.startTime
-		print(f"Time elapsed: {timeElapsed:.2f} seconds")
-		if self.numReviewed > 0:
-			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
-		dbCon.close()
-		self.root.destroy()
-	def resizeImgForDisplay(self, img):
-		" Returns a copy of an image, shrunk to fit it's frame (keeps aspect ratio), and with a background "
-		if max(img.width, img.height) > IMG_DISPLAY_SZ:
-			if (img.width > img.height):
-				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
-				img = img.resize((IMG_DISPLAY_SZ, newHeight))
-			else:
-				newWidth = int(img.width * IMG_DISPLAY_SZ / img.height)
-				img = img.resize((newWidth, IMG_DISPLAY_SZ))
-		bgImg = PLACEHOLDER_IMG.copy()
-		bgImg.paste(img, box=(
-			int((IMG_DISPLAY_SZ - img.width) / 2),
-			int((IMG_DISPLAY_SZ - img.height) / 2)))
-		return bgImg
-# Create GUI and defer control
-print("Starting GUI")
-root = tki.Tk()
-ImgReviewer(root, nodeToImgs)
-root.mainloop()
diff --git a/backend/server.py b/backend/server.py
index a00ab7f..5e0b80f 100755
--- a/backend/server.py
+++ b/backend/server.py
@@ -1,18 +1,38 @@
 #!/usr/bin/python3
 
-import sys
-from wsgiref.simple_server import make_server
+import sys, os
+from wsgiref import simple_server, util
+import mimetypes
 from tilo import application
 
 usageInfo = f"""
 Usage: {sys.argv[0]}
 
-Runs a basic dev server that serves a WSGI script
+Runs a basic dev server that serves a WSGI script and image files
 """
 if len(sys.argv) > 1:
 	print(usageInfo, file=sys.stderr)
 	sys.exit(1)
 
-with make_server('', 8000, application) as httpd:
+def wrappingApp(environ, start_response):
+	urlPath = environ["PATH_INFO"]
+	if urlPath.startswith("/data/"):
+		# Run WSGI script
+		return application(environ, start_response)
+	elif urlPath.startswith("/tolData/img/"):
+		# Serve image file
+		imgPath = os.path.join(os.getcwd(), urlPath[1:])
+		if os.path.exists(imgPath):
+			imgType = mimetypes.guess_type(imgPath)[0]
+			start_response("200 OK", [("Content-type", imgType)])
+			return util.FileWrapper(open(imgPath, "rb"))
+		else:
+			start_response("404 Not Found", [("Content-type", "text/plain")])
+			return [b"No image found"]
+	else:
+		start_response("404 Not Found", [("Content-type", "text/plain")])
+		return [b"Unrecognised path"]
+
+with simple_server.make_server('', 8000, wrappingApp) as httpd:
     print("Serving HTTP on port 8000...")
     httpd.serve_forever()
diff --git a/backend/tilo.py b/backend/tilo.py
index f2a177e..7b0f8aa 100755
--- a/backend/tilo.py
+++ b/backend/tilo.py
@@ -5,7 +5,7 @@ import urllib.parse
 import sqlite3
 import gzip, jsonpickle
 
-dbFile = "data/data.db"
+dbFile = "tolData/data.db"
 DEFAULT_SUGG_LIM = 5
 MAX_SUGG_LIM = 50
 ROOT_NAME = "cellular organisms"
diff --git a/backend/tolData/README.md b/backend/tolData/README.md
new file mode 100644
index 0000000..ba64114
--- /dev/null
+++ b/backend/tolData/README.md
@@ -0,0 +1,152 @@
+This directory holds files used to generate data.db, which contains tree-of-life data.
+
+# Tables
+## Tree Structure data
+-   `nodes` <br>
+    Format : `name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT` <br>
+    Represents a tree-of-life node. `tips` represents the number of no-child descendants.
+-   `edges` <br>
+    Format: `parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child)` <br>
+    `p_support` is 1 if the edge has 'phylogenetic support', and 0 otherwise
+## Node name data
+-   `eol_ids` <br>
+    Format: `id INT PRIMARY KEY, name TEXT` <br>
+    Associates an EOL ID with a node's name.
+-   `names` <br>
+    Format: `name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name)` <br>
+    Associates a node with alternative names.
+    `pref_alt` is 1 if the alt-name is the most 'preferred' one.
+    `src` indicates the dataset the alt-name was obtained from (can be 'eol', 'enwiki', or 'picked').
+## Node description data
+-   `wiki_ids` <br>
+    Format: `name TEXT PRIMARY KEY, id INT, redirected INT` <br>
+    Associates a node with a wikipedia page ID.
+    `redirected` is 1 if the node was associated with a different page that redirected to this one.
+-   `descs` <br>
+    Format: `wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT` <br>
+    Associates a wikipedia page ID with a short-description.
+    `from_dbp` is 1 if the description was obtained from DBpedia, and 0 otherwise.
+## Node image data
+-   `node_imgs` <br>
+    Format: `name TEXT PRIMARY KEY, img_id INT, src TEXT` <br>
+    Associates a node with an image.
+-   `images` <br>
+    Format: `id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src)` <br>
+    Represents an image, identified by a source ('eol', 'enwiki', or 'picked'), and a source-specific ID.
+-   `linked_imgs` <br>
+    Format: `name TEXT PRIMARY KEY, otol_ids TEXT` <br>
+    Associates a node with an image from another node.
+    `otol_ids` can be an otol ID, or two comma-separated otol IDs or empty strings.
+        The latter is used for compound nodes.
+## Reduced tree data
+-   `nodes_t`, `nodes_i`, `nodes_p` <br>
+    These are like `nodes`, but describe the nodes for various reduced trees.
+-   `edges_t`, `edges_i`, `edges_p` <br>
+    Like `edges` but for reduced trees.
+
+# Generating the Database
+
+For the most part, these steps should be done in order.
+
+As a warning, the whole process takes a lot of time and file space. The tree will probably
+have about 2.5 billion nodes. Downloading the images takes several days, and occupies over
+200 GB. And if you want good data, you'll need to do some manual review, which can take weeks.
+
+## Environment
+The scripts are written in python and bash.
+Some of the python scripts require third-party packages:
+-   jsonpickle: For encoding class objects as JSON.
+-   requests: For downloading data.
+-   PIL: For image processing.
+-   tkinter: For providing a basic GUI to review images.
+-   mwxml, mwparserfromhell: For parsing Wikipedia dumps.
+
+## Generate tree structure data
+1.  Obtain files in otol/, as specified in it's README.
+2.  Run genOtolData.py, which creates data.db, and adds the `nodes` and `edges` tables,
+    using data in otol/. It also uses these files, if they exist:
+    -   pickedOtolNames.txt: Has lines of the form `name1|otolId1`. Some nodes in the
+        tree may have the same name (eg: Pholidota can refer to pangolins or orchids).
+        Normally, such nodes will get the names 'name1', 'name1 [2]', 'name1 [3], etc.
+        This file can be used to manually specify which node should be named 'name1'.
+
+## Generate node name data
+1.  Obtain 'name data files' in eol/, as specified in it's README.
+2.  Run genEolNameData.py, which adds the `names` and `eol_ids` tables, using data in
+    eol/ and the `nodes` table. It also uses these files, if they exist:
+    -   pickedEolIds.txt: Has lines of the form `nodeName1|eolId1` or `nodeName1|`.
+        Specifies node names that should have a particular EOL ID, or no ID.
+        Quite a few taxons have ambiguous names, and may need manual correction.
+        For example, Viola may resolve to a taxon of butterflies or of plants.
+    -   pickedEolAltsToSkip.txt: Has lines of the form `nodeName1|altName1`.
+        Specifies that a node's alt-name set should exclude altName1.
+
+## Generate node description data
+### Get data from DBpedia
+1.  Obtain files in dbpedia/, as specified in it's README.
+2.  Run genDbpData.py, which adds the `wiki_ids` and `descs` tables, using data in
+    dbpedia/ and the `nodes` table. It also uses these files, if they exist:
+    -   pickedEnwikiNamesToSkip.txt: Each line holds the name of a node for which
+        no description should be obtained. Many node names have a same-name
+        wikipedia page that describes something different (eg: Osiris).
+    -   pickedDbpLabels.txt: Has lines of the form `nodeName1|label1`.
+        Specifies node names that should have a particular associated page label.
+### Get data from Wikipedia
+1.  Obtain 'description database files' in enwiki/, as specified in it's README.
+2.  Run genEnwikiDescData.py, which adds to the `wiki_ids` and `descs` tables,
+    using data in enwiki/ and the `nodes` table.
+    It also uses these files, if they exist:
+    -   pickedEnwikiNamesToSkip.txt: Same as with genDbpData.py.
+    -   pickedEnwikiLabels.txt: Similar to pickedDbpLabels.txt.
+
+## Generate node image data
+### Get images from EOL
+1.  Obtain 'image metadata files' in eol/, as specified in it's README.
+2.  In eol/, run downloadImgs.py, which downloads images (possibly multiple per node),
+    into eol/imgsForReview, using data in eol/, as well as the `eol_ids` table.
+3.  In eol/, run reviewImgs.py, which interactively displays the downloaded images for
+    each node, providing the choice of which to use, moving them to eol/imgs/.
+    Uses `names` and `eol_ids` to display extra info.
+### Get images from Wikipedia
+1.  In enwiki/, run genImgData.py, which looks for wikipedia image names for each node,
+    using the `wiki_ids` table, and stores them in a database.
+2.  In enwiki/, run downloadImgLicenseInfo.py, which downloads licensing information for
+    those images, using wikipedia's online API.
+3.  In enwiki/, run downloadImgs.py, which downloads 'permissively-licensed'
+    images into enwiki/imgs/.
+### Merge the image sets
+1.  Run reviewImgsToGen.py, which displays images from eol/imgs/ and enwiki/imgs/,
+    and enables choosing, for each node, which image should be used, if any,
+    and outputs choice information into imgList.txt. Uses the `nodes`,
+    `eol_ids`, and `wiki_ids` tables (as well as `names` to display extra info).
+2.  Run genImgs.py, which creates cropped/resized images in img/, from files listed in
+    imgList.txt and located in eol/ and enwiki/, and creates the `node_imgs` and
+    `images` tables. If pickedImgs/ is present, images within it are also used. <br>
+    The outputs might need to be manually created/adjusted:
+    -   An input image might have no output produced, possibly due to
+        data incompatibilities, memory limits, etc. A few input image files
+        might actually be html files, containing a 'file not found' page.
+    -   An input x.gif might produce x-1.jpg, x-2.jpg, etc, instead of x.jpg.
+    -   An input image might produce output with unexpected dimensions.
+        This seems to happen when the image is very large, and triggers a
+        decompression bomb warning.
+    The result might have as many as 150k images, with about 2/3 of them
+    being from wikipedia.
+### Add more image associations
+1.  Run genLinkedImgs.py, which tries to associate nodes without images to
+    images of it's children. Adds the `linked_imgs` table, and uses the
+    `nodes`, `edges`, and `node_imgs` tables.
+
+## Do some post-processing
+1.  Run genEnwikiNameData.py, which adds more entries to the `names` table,
+    using data in enwiki/, and the `names` and `wiki_ids` tables.
+2.  Optionally run addPickedNames.py, which allows adding manually-selected name data to
+    the `names` table, as specified in pickedNames.txt.
+    -   pickedNames.txt: Has lines of the form `nodeName1|altName1|prefAlt1`.
+        These correspond to entries in the `names` table. `prefAlt` should be 1 or 0.
+        A line like `name1|name1|1` causes a node to have no preferred alt-name.
+3.  Run genReducedTrees.py, which generates multiple reduced versions of the tree,
+    adding the `nodes_*` and `edges_*` tables, using `nodes` and `names`. Reads from
+    pickedNodes.txt, which lists names of nodes that must be included (1 per line).
+    The original tree isn't used for web-queries, as some nodes would have over 
+    10k children, which can take a while to render (took over a minute in testing).
diff --git a/backend/tolData/addPickedNames.py b/backend/tolData/addPickedNames.py
new file mode 100755
index 0000000..d56a0cb
--- /dev/null
+++ b/backend/tolData/addPickedNames.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python3
+
+import sys
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads alt-name data from a file, and adds it to the database's 'names' table.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+dbFile = "data.db"
+pickedNamesFile = "pickedNames.txt"
+
+print("Opening database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print("Iterating through picked-names file")
+with open(pickedNamesFile) as file:
+	for line in file:
+		# Get record data
+		nodeName, altName, prefAlt = line.lower().rstrip().split("|")
+		prefAlt = int(prefAlt)
+		# Check whether there exists a node with the name
+		row = dbCur.execute("SELECT name from nodes where name = ?", (nodeName,)).fetchone()
+		if row == None:
+			print(f"ERROR: No node with name \"{nodeName}\" exists")
+			break
+		# Remove any existing preferred-alt status
+		if prefAlt == 1:
+			query = "SELECT name, alt_name FROM names WHERE name = ? AND pref_alt = 1"
+			row = dbCur.execute(query, (nodeName,)).fetchone()
+			if row != None and row[1] != altName:
+				print(f"Removing pref-alt status from alt-name {row[1]} for {nodeName}")
+				dbCur.execute("UPDATE names SET pref_alt = 0 WHERE name = ? AND alt_name = ?", row)
+		# Check for an existing record
+		if nodeName == altName:
+			continue
+		query = "SELECT name, alt_name, pref_alt FROM names WHERE name = ? AND alt_name = ?"
+		row = dbCur.execute(query, (nodeName, altName)).fetchone()
+		if row == None:
+			print(f"Adding record for alt-name {altName} for {nodeName}")
+			dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'picked')", (nodeName, altName, prefAlt))
+		else:
+			# Update existing record
+			if row[2] != prefAlt:
+				print(f"Updating record for alt-name {altName} for {nodeName}")
+				dbCur.execute("UPDATE names SET pref_alt = ?, src = 'picked' WHERE name = ? AND alt_name = ?",
+					(prefAlt, nodeName, altName))
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/dbpedia/README.md b/backend/tolData/dbpedia/README.md
new file mode 100644
index 0000000..8a08f20
--- /dev/null
+++ b/backend/tolData/dbpedia/README.md
@@ -0,0 +1,29 @@
+This directory holds files obtained from/using [Dbpedia](https://www.dbpedia.org).
+
+# Downloaded Files
+-   `labels_lang=en.ttl.bz2` <br>
+    Obtained via https://databus.dbpedia.org/dbpedia/collections/latest-core.
+    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/labels/2022.03.01/labels_lang=en.ttl.bz2>.
+-   `page_lang=en_ids.ttl.bz2` <br>
+    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/page/2022.03.01/page_lang=en_ids.ttl.bz2>
+-   `redirects_lang=en_transitive.ttl.bz2` <br>
+    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/redirects/2022.03.01/redirects_lang=en_transitive.ttl.bz2>.
+-   `disambiguations_lang=en.ttl.bz2` <br>
+    Downloaded from <https://databus.dbpedia.org/dbpedia/generic/disambiguations/2022.03.01/disambiguations_lang=en.ttl.bz2>.
+-   `instance-types_lang=en_specific.ttl.bz2` <br>
+    Downloaded from <https://databus.dbpedia.org/dbpedia/mappings/instance-types/2022.03.01/instance-types_lang=en_specific.ttl.bz2>.
+-   `short-abstracts_lang=en.ttl.bz2` <br>
+    Downloaded from <https://databus.dbpedia.org/vehnem/text/short-abstracts/2021.05.01/short-abstracts_lang=en.ttl.bz2>.
+
+# Other Files
+-   genDescData.py <br>
+    Used to generate a database representing data from the ttl files.
+-   descData.db <br>
+    Generated by genDescData.py. <br>
+    Tables: <br>
+    -   `labels`:          `iri TEXT PRIMARY KEY, label TEXT `
+    -   `ids`:             `iri TEXT PRIMARY KEY, id INT`
+    -   `redirects`:       `iri TEXT PRIMARY KEY, target TEXT`
+    -   `disambiguations`: `iri TEXT PRIMARY KEY`
+    -   `types`:           `iri TEXT, type TEXT`
+    -   `abstracts`:       `iri TEXT PRIMARY KEY, abstract TEXT`
diff --git a/backend/tolData/dbpedia/genDescData.py b/backend/tolData/dbpedia/genDescData.py
new file mode 100755
index 0000000..d9e8a80
--- /dev/null
+++ b/backend/tolData/dbpedia/genDescData.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python3
+
+import sys, re
+import bz2, sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Adds DBpedia labels/types/abstracts/etc data into a database.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+labelsFile = "labels_lang=en.ttl.bz2" # Had about 16e6 entries
+idsFile = "page_lang=en_ids.ttl.bz2"
+redirectsFile = "redirects_lang=en_transitive.ttl.bz2"
+disambigFile = "disambiguations_lang=en.ttl.bz2"
+typesFile = "instance-types_lang=en_specific.ttl.bz2"
+abstractsFile = "short-abstracts_lang=en.ttl.bz2"
+dbFile = "descData.db"
+# In testing, this script took a few hours to run, and generated about 10GB
+
+print("Creating database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print("Reading/storing label data")
+dbCur.execute("CREATE TABLE labels (iri TEXT PRIMARY KEY, label TEXT)")
+dbCur.execute("CREATE INDEX labels_idx ON labels(label)")
+dbCur.execute("CREATE INDEX labels_idx_nc ON labels(label COLLATE NOCASE)")
+labelLineRegex = re.compile(r'<([^>]+)> <[^>]+> "((?:[^"]|\\")+)"@en \.\n')
+lineNum = 0
+with bz2.open(labelsFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = labelLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO labels VALUES (?, ?)", (match.group(1), match.group(2)))
+
+print("Reading/storing wiki page ids")
+dbCur.execute("CREATE TABLE ids (iri TEXT PRIMARY KEY, id INT)")
+idLineRegex = re.compile(r'<([^>]+)> <[^>]+> "(\d+)".*\n')
+lineNum = 0
+with bz2.open(idsFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = idLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		try:
+			dbCur.execute("INSERT INTO ids VALUES (?, ?)", (match.group(1), int(match.group(2))))
+		except sqlite3.IntegrityError as e:
+			# Accounts for certain lines that have the same IRI
+			print(f"WARNING: Failed to add entry with IRI \"{match.group(1)}\": {e}")
+
+print("Reading/storing redirection data")
+dbCur.execute("CREATE TABLE redirects (iri TEXT PRIMARY KEY, target TEXT)")
+redirLineRegex = re.compile(r'<([^>]+)> <[^>]+> <([^>]+)> \.\n')
+lineNum = 0
+with bz2.open(redirectsFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = redirLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (match.group(1), match.group(2)))
+
+print("Reading/storing diambiguation-page data")
+dbCur.execute("CREATE TABLE disambiguations (iri TEXT PRIMARY KEY)")
+disambigLineRegex = redirLineRegex
+lineNum = 0
+with bz2.open(disambigFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = disambigLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT OR IGNORE INTO disambiguations VALUES (?)", (match.group(1),))
+
+print("Reading/storing instance-type data")
+dbCur.execute("CREATE TABLE types (iri TEXT, type TEXT)")
+dbCur.execute("CREATE INDEX types_iri_idx ON types(iri)")
+typeLineRegex = redirLineRegex
+lineNum = 0
+with bz2.open(typesFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = typeLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO types VALUES (?, ?)", (match.group(1), match.group(2)))
+
+print("Reading/storing abstracts")
+dbCur.execute("CREATE TABLE abstracts (iri TEXT PRIMARY KEY, abstract TEXT)")
+descLineRegex = labelLineRegex
+lineNum = 0
+with bz2.open(abstractsFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		if line[0] == "#":
+			continue
+		match = descLineRegex.fullmatch(line)
+		if match == None:
+			raise Exception(f"ERROR: Line {lineNum} has unexpected format")
+		dbCur.execute("INSERT INTO abstracts VALUES (?, ?)",
+			(match.group(1), match.group(2).replace(r'\"', '"')))
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/enwiki/README.md b/backend/tolData/enwiki/README.md
new file mode 100644
index 0000000..90d16c7
--- /dev/null
+++ b/backend/tolData/enwiki/README.md
@@ -0,0 +1,52 @@
+This directory holds files obtained from/using [English Wikipedia](https://en.wikipedia.org/wiki/Main_Page).
+
+# Downloaded Files
+-   enwiki-20220501-pages-articles-multistream.xml.bz2 <br>
+    Obtained via <https://dumps.wikimedia.org/backup-index.html> (site suggests downloading from a mirror).
+    Contains text content and metadata for pages in enwiki.
+    Some file content and format information was available from
+        <https://meta.wikimedia.org/wiki/Data_dumps/What%27s_available_for_download>.
+-   enwiki-20220501-pages-articles-multistream-index.txt.bz2 <br>
+    Obtained like above. Holds lines of the form offset1:pageId1:title1,
+    providing, for each page, an offset into the dump file of a chunk of
+    100 pages that includes it.
+
+# Generated Dump-Index Files
+-   genDumpIndexDb.py <br>
+    Creates an sqlite-database version of the enwiki-dump index file.
+-   dumpIndex.db <br>
+    Generated by genDumpIndexDb.py. <br>
+    Tables: <br>
+    -   `offsets`: `title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT`
+
+# Description Database Files
+-   genDescData.py <br>
+    Reads through pages in the dump file, and adds short-description info to a database.
+-   descData.db <br>
+    Generated by genDescData.py. <br>
+    Tables: <br>
+    -   `pages`:     `id INT PRIMARY KEY, title TEXT UNIQUE`
+    -   `redirects`: `id INT PRIMARY KEY, target TEXT`
+    -   `descs`:     `id INT PRIMARY KEY, desc TEXT`
+
+# Image Database Files
+-   genImgData.py <br>
+    Used to find infobox image names for page IDs, storing them into a database.
+-   downloadImgLicenseInfo.py <br>
+    Used to download licensing metadata for image names, via wikipedia's online API, storing them into a database.
+-   imgData.db <br>
+    Used to hold metadata about infobox images for a set of pageIDs.
+    Generated using getEnwikiImgData.py and downloadImgLicenseInfo.py. <br>
+    Tables: <br>
+    -   `page_imgs`: `page_id INT PRIMAY KEY, img_name TEXT` <br>
+        `img_name` may be null, which means 'none found', and is used to avoid re-processing page-ids.
+    -   `imgs`: `name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT` <br>
+        Might lack some matches for `img_name` in `page_imgs`, due to licensing info unavailability.
+-   downloadImgs.py <br>
+    Used to download image files into imgs/.
+
+# Other Files
+-   lookupPage.py <br>
+    Running `lookupPage.py title1` looks in the dump for a page with a given title,
+    and prints the contents to stdout. Uses dumpIndex.db.
+
diff --git a/backend/tolData/enwiki/downloadImgLicenseInfo.py b/backend/tolData/enwiki/downloadImgLicenseInfo.py
new file mode 100755
index 0000000..399922e
--- /dev/null
+++ b/backend/tolData/enwiki/downloadImgLicenseInfo.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python3
+
+import sys, re
+import sqlite3, urllib.parse, html
+import requests
+import time, signal
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads image names from a database, and uses enwiki's online API to obtain
+licensing information for them, adding the info to the database.
+
+SIGINT causes the program to finish an ongoing download and exit.
+The program can be re-run to continue downloading, and looks
+at already-processed names to decide what to skip.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+imgDb = "imgData.db"
+apiUrl = "https://en.wikipedia.org/w/api.php"
+userAgent = "terryt.dev (terry06890@gmail.com)"
+batchSz = 50 # Max 50
+tagRegex = re.compile(r"<[^<]+>")
+whitespaceRegex = re.compile(r"\s+")
+
+print("Opening database")
+dbCon = sqlite3.connect(imgDb)
+dbCur = dbCon.cursor()
+dbCur2 = dbCon.cursor()
+print("Checking for table")
+if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='imgs'").fetchone() == None:
+	dbCur.execute("CREATE TABLE imgs(" \
+		"name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT)")
+
+print("Reading image names")
+imgNames = set()
+for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL"):
+	imgNames.add(imgName)
+print(f"Found {len(imgNames)}")
+
+print("Checking for already-processed images")
+oldSz = len(imgNames)
+for (imgName,) in dbCur.execute("SELECT name FROM imgs"):
+	imgNames.discard(imgName)
+print(f"Found {oldSz - len(imgNames)}")
+
+# Set SIGINT handler
+interrupted = False
+oldHandler = None
+def onSigint(sig, frame):
+	global interrupted
+	interrupted = True
+	signal.signal(signal.SIGINT, oldHandler)
+oldHandler = signal.signal(signal.SIGINT, onSigint)
+
+print("Iterating through image names")
+imgNames = list(imgNames)
+iterNum = 0
+for i in range(0, len(imgNames), batchSz):
+	iterNum += 1
+	if iterNum % 1 == 0:
+		print(f"At iteration {iterNum} (after {(iterNum - 1) * batchSz} images)")
+	if interrupted:
+		print(f"Exiting loop at iteration {iterNum}")
+		break
+	# Get batch
+	imgBatch = imgNames[i:i+batchSz]
+	imgBatch = ["File:" + x for x in imgBatch]
+	# Make request
+	headers = {
+		"user-agent": userAgent,
+		"accept-encoding": "gzip",
+	}
+	params = {
+		"action": "query",
+		"format": "json",
+		"prop": "imageinfo",
+		"iiprop": "extmetadata|url",
+		"maxlag": "5",
+		"titles": "|".join(imgBatch),
+		"iiextmetadatafilter": "Artist|Credit|LicenseShortName|Restrictions",
+	}
+	responseObj = None
+	try:
+		response = requests.get(apiUrl, params=params, headers=headers)
+		responseObj = response.json()
+	except Exception as e:
+		print(f"ERROR: Exception while downloading info: {e}")
+		print(f"\tImage batch: " + "|".join(imgBatch))
+		continue
+	# Parse response-object
+	if "query" not in responseObj or "pages" not in responseObj["query"]:
+		print("WARNING: Response object for doesn't have page data")
+		print("\tImage batch: " + "|".join(imgBatch))
+		if "error" in responseObj:
+			errorCode = responseObj["error"]["code"]
+			print(f"\tError code: {errorCode}")
+			if errorCode == "maxlag":
+				time.sleep(5)
+		continue
+	pages = responseObj["query"]["pages"]
+	normalisedToInput = {}
+	if "normalized" in responseObj["query"]:
+		for entry in responseObj["query"]["normalized"]:
+			normalisedToInput[entry["to"]] = entry["from"]
+	for (_, page) in pages.items():
+		# Some fields // More info at https://www.mediawiki.org/wiki/Extension:CommonsMetadata#Returned_data
+			# LicenseShortName: short human-readable license name, apparently more reliable than 'License',
+			# Artist: author name (might contain complex html, multiple authors, etc)
+			# Credit: 'source'
+				# For image-map-like images, can be quite large/complex html, creditng each sub-image
+				# May be <a href="text1">text2</a>, where the text2 might be non-indicative
+			# Restrictions: specifies non-copyright legal restrictions
+		title = page["title"]
+		if title in normalisedToInput:
+			title = normalisedToInput[title]
+		title = title[5:] # Remove 'File:'
+		if title not in imgNames:
+			print(f"WARNING: Got title \"{title}\" not in image-name list")
+			continue
+		if "imageinfo" not in page:
+			print(f"WARNING: No imageinfo section for page \"{title}\"")
+			continue
+		metadata = page["imageinfo"][0]["extmetadata"]
+		url = page["imageinfo"][0]["url"]
+		license = metadata['LicenseShortName']['value'] if 'LicenseShortName' in metadata else None
+		artist = metadata['Artist']['value'] if 'Artist' in metadata else None
+		credit = metadata['Credit']['value'] if 'Credit' in metadata else None
+		restrictions = metadata['Restrictions']['value'] if 'Restrictions' in metadata else None
+		# Remove markup
+		if artist != None:
+			artist = tagRegex.sub(" ", artist)
+			artist = whitespaceRegex.sub(" ", artist)
+			artist = html.unescape(artist)
+			artist = urllib.parse.unquote(artist)
+		if credit != None:
+			credit = tagRegex.sub(" ", credit)
+			credit = whitespaceRegex.sub(" ", credit)
+			credit = html.unescape(credit)
+			credit = urllib.parse.unquote(credit)
+		# Add to db
+		dbCur2.execute("INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?)",
+			(title, license, artist, credit, restrictions, url))
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/enwiki/downloadImgs.py b/backend/tolData/enwiki/downloadImgs.py
new file mode 100755
index 0000000..8fb605f
--- /dev/null
+++ b/backend/tolData/enwiki/downloadImgs.py
@@ -0,0 +1,91 @@
+#!/usr/bin/python3
+
+import sys, re, os
+import sqlite3
+import urllib.parse, requests
+import time, signal
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Downloads images from URLs in an image database, into an output directory,
+with names of the form 'pageId1.ext1'.
+
+SIGINT causes the program to finish an ongoing download and exit.
+The program can be re-run to continue downloading, and looks
+in the output directory do decide what to skip.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+imgDb = "imgData.db" # About 130k image names
+outDir = "imgs"
+licenseRegex = re.compile(r"cc0|cc([ -]by)?([ -]sa)?([ -][1234]\.[05])?( \w\w\w?)?", flags=re.IGNORECASE)
+# In testing, this downloaded about 100k images, over several days
+
+if not os.path.exists(outDir):
+	os.mkdir(outDir)
+print("Checking for already-downloaded images")
+fileList = os.listdir(outDir)
+pageIdsDone = set()
+for filename in fileList:
+	(basename, extension) = os.path.splitext(filename)
+	pageIdsDone.add(int(basename))
+print(f"Found {len(pageIdsDone)}")
+
+# Set SIGINT handler
+interrupted = False
+oldHandler = None
+def onSigint(sig, frame):
+	global interrupted
+	interrupted = True
+	signal.signal(signal.SIGINT, oldHandler)
+oldHandler = signal.signal(signal.SIGINT, onSigint)
+
+print("Opening database")
+dbCon = sqlite3.connect(imgDb)
+dbCur = dbCon.cursor()
+print("Starting downloads")
+iterNum = 0
+query = "SELECT page_id, license, artist, credit, restrictions, url FROM" \
+	" imgs INNER JOIN page_imgs ON imgs.name = page_imgs.img_name"
+for (pageId, license, artist, credit, restrictions, url) in dbCur.execute(query):
+	if pageId in pageIdsDone:
+		continue
+	if interrupted:
+		print(f"Exiting loop")
+		break
+	# Check for problematic attributes
+	if license == None or licenseRegex.fullmatch(license) == None:
+		continue
+	if artist == None or artist == "" or len(artist) > 100 or re.match(r"(\d\. )?File:", artist) != None:
+		continue
+	if credit == None or len(credit) > 300 or re.match(r"File:", credit) != None:
+		continue
+	if restrictions != None and restrictions != "":
+		continue
+	# Download image
+	iterNum += 1
+	print(f"Iteration {iterNum}: Downloading for page-id {pageId}")
+	urlParts = urllib.parse.urlparse(url)
+	extension = os.path.splitext(urlParts.path)[1]
+	if len(extension) <= 1:
+		print(f"WARNING: No filename extension found in URL {url}")
+		sys.exit(1)
+	outFile = f"{outDir}/{pageId}{extension}"
+	headers = {
+		"user-agent": "terryt.dev (terry06890@gmail.com)",
+		"accept-encoding": "gzip",
+	}
+	try:
+		response = requests.get(url, headers=headers)
+		with open(outFile, 'wb') as file:
+			file.write(response.content)
+		time.sleep(1)
+			# https://en.wikipedia.org/wiki/Wikipedia:Database_download says to "throttle self to 1 cache miss per sec"
+			# It's unclear how to properly check for cache misses, so this just aims for 1 per sec
+	except Exception as e:
+		print(f"Error while downloading to {outFile}: {e}")
+print("Closing database")
+dbCon.close()
diff --git a/backend/tolData/enwiki/genDescData.py b/backend/tolData/enwiki/genDescData.py
new file mode 100755
index 0000000..b0ca272
--- /dev/null
+++ b/backend/tolData/enwiki/genDescData.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python3
+
+import sys, os, re
+import bz2
+import html, mwxml, mwparserfromhell
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads through the wiki dump, and attempts to
+parse short-descriptions, and add them to a database.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" # Had about 22e6 pages
+enwikiDb = "descData.db"
+# In testing, this script took over 10 hours to run, and generated about 5GB
+
+descLineRegex = re.compile("^ *[A-Z'\"]")
+embeddedHtmlRegex = re.compile(r"<[^<]+/>|<!--[^<]+-->|<[^</]+>([^<]*|[^<]*<[^<]+>[^<]*)</[^<]+>|<[^<]+$")
+	# Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag
+convertTemplateRegex = re.compile(r"{{convert\|(\d[^|]*)\|(?:(to|-)\|(\d[^|]*)\|)?([a-z][^|}]*)[^}]*}}")
+def convertTemplateReplace(match):
+	if match.group(2) == None:
+		return f"{match.group(1)} {match.group(4)}"
+	else:
+		return f"{match.group(1)} {match.group(2)} {match.group(3)} {match.group(4)}"
+parensGroupRegex = re.compile(r" \([^()]*\)")
+leftoverBraceRegex = re.compile(r"(?:{\||{{).*")
+
+def parseDesc(text):
+	# Find first matching line outside {{...}}, [[...]], and block-html-comment constructs,
+		# and then accumulate lines until a blank one.
+	# Some cases not accounted for include: disambiguation pages, abstracts with sentences split-across-lines, 
+		# nested embedded html, 'content significant' embedded-html, markup not removable with mwparsefromhell, 
+	lines = []
+	openBraceCount = 0
+	openBracketCount = 0
+	inComment = False
+	skip = False
+	for line in text.splitlines():
+		line = line.strip()
+		if len(lines) == 0:
+			if len(line) > 0:
+				if openBraceCount > 0 or line[0] == "{":
+					openBraceCount += line.count("{")
+					openBraceCount -= line.count("}")
+					skip = True
+				if openBracketCount > 0 or line[0] == "[":
+					openBracketCount += line.count("[")
+					openBracketCount -= line.count("]")
+					skip = True
+				if inComment or line.find("<!--") != -1:
+					if line.find("-->") != -1:
+						if inComment:
+							inComment = False
+							skip = True
+					else:
+						inComment = True
+						skip = True
+				if skip:
+					skip = False
+					continue
+				if line[-1] == ":": # Seems to help avoid disambiguation pages
+					return None
+				if descLineRegex.match(line) != None:
+					lines.append(line)
+		else:
+			if len(line) == 0:
+				return removeMarkup(" ".join(lines))
+			lines.append(line)
+	if len(lines) > 0:
+		return removeMarkup(" ".join(lines))
+	return None
+def removeMarkup(content):
+	content = embeddedHtmlRegex.sub("", content)
+	content = convertTemplateRegex.sub(convertTemplateReplace, content)
+	content = mwparserfromhell.parse(content).strip_code() # Remove wikitext markup
+	content = parensGroupRegex.sub("", content)
+	content = leftoverBraceRegex.sub("", content)
+	return content
+def convertTitle(title):
+	return html.unescape(title).replace("_", " ")
+
+print("Creating database")
+if os.path.exists(enwikiDb):
+	raise Exception(f"ERROR: Existing {enwikiDb}")
+dbCon = sqlite3.connect(enwikiDb)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE pages (id INT PRIMARY KEY, title TEXT UNIQUE)")
+dbCur.execute("CREATE INDEX pages_title_idx ON pages(title COLLATE NOCASE)")
+dbCur.execute("CREATE TABLE redirects (id INT PRIMARY KEY, target TEXT)")
+dbCur.execute("CREATE INDEX redirects_idx ON redirects(target)")
+dbCur.execute("CREATE TABLE descs (id INT PRIMARY KEY, desc TEXT)")
+
+print("Iterating through dump file")
+with bz2.open(dumpFile, mode='rt') as file:
+	dump = mwxml.Dump.from_file(file)
+	pageNum = 0
+	for page in dump:
+		pageNum += 1
+		if pageNum % 1e4 == 0:
+			print(f"At page {pageNum}")
+		if pageNum > 3e4:
+			break
+		# Parse page
+		if page.namespace == 0:
+			try:
+				dbCur.execute("INSERT INTO pages VALUES (?, ?)", (page.id, convertTitle(page.title)))
+			except sqlite3.IntegrityError as e:
+				# Accounts for certain pages that have the same title
+				print(f"Failed to add page with title \"{page.title}\": {e}", file=sys.stderr)
+				continue
+			if page.redirect != None:
+				dbCur.execute("INSERT INTO redirects VALUES (?, ?)", (page.id, convertTitle(page.redirect)))
+			else:
+				revision = next(page)
+				desc = parseDesc(revision.text)
+				if desc != None:
+					dbCur.execute("INSERT INTO descs VALUES (?, ?)", (page.id, desc))
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/enwiki/genDumpIndexDb.py b/backend/tolData/enwiki/genDumpIndexDb.py
new file mode 100755
index 0000000..3955885
--- /dev/null
+++ b/backend/tolData/enwiki/genDumpIndexDb.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python3
+
+import sys, os, re
+import bz2
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Adds data from the wiki dump index-file into a database.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+indexFile = "enwiki-20220501-pages-articles-multistream-index.txt.bz2" # Had about 22e6 lines
+indexDb = "dumpIndex.db"
+
+if os.path.exists(indexDb):
+	raise Exception(f"ERROR: Existing {indexDb}")
+print("Creating database")
+dbCon = sqlite3.connect(indexDb)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE offsets (title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT)")
+
+print("Iterating through index file")
+lineRegex = re.compile(r"([^:]+):([^:]+):(.*)")
+lastOffset = 0
+lineNum = 0
+entriesToAdd = []
+with bz2.open(indexFile, mode='rt') as file:
+	for line in file:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		#
+		match = lineRegex.fullmatch(line.rstrip())
+		(offset, pageId, title) = match.group(1,2,3)
+		offset = int(offset)
+		if offset > lastOffset:
+			for (t, p) in entriesToAdd:
+				try:
+					dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (t, p, lastOffset, offset))
+				except sqlite3.IntegrityError as e:
+					# Accounts for certain entries in the file that have the same title
+					print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
+			entriesToAdd = []
+			lastOffset = offset
+		entriesToAdd.append([title, pageId])
+for (title, pageId) in entriesToAdd:
+	try:
+		dbCur.execute("INSERT INTO offsets VALUES (?, ?, ?, ?)", (title, pageId, lastOffset, -1))
+	except sqlite3.IntegrityError as e:
+		print(f"Failed on title \"{t}\": {e}", file=sys.stderr)
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/enwiki/genImgData.py b/backend/tolData/enwiki/genImgData.py
new file mode 100755
index 0000000..dedfe14
--- /dev/null
+++ b/backend/tolData/enwiki/genImgData.py
@@ -0,0 +1,190 @@
+#!/usr/bin/python3
+
+import sys, re
+import bz2, html, urllib.parse
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+For some set of page IDs, looks up their content in the wiki dump,
+and tries to parse infobox image names, storing them into a database.
+
+The program can be re-run with an updated set of page IDs, and
+will skip already-processed page IDs.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+def getInputPageIds():
+	pageIds = set()
+	dbCon = sqlite3.connect("../data.db")
+	dbCur = dbCon.cursor()
+	for (pageId,) in dbCur.execute("SELECT id from wiki_ids"):
+		pageIds.add(pageId)
+	dbCon.close()
+	return pageIds
+dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
+indexDb = "dumpIndex.db"
+imgDb = "imgData.db" # The database to create
+idLineRegex = re.compile(r"<id>(.*)</id>")
+imageLineRegex = re.compile(r".*\| *image *= *([^|]*)")
+bracketImageRegex = re.compile(r"\[\[(File:[^|]*).*]]")
+imageNameRegex = re.compile(r".*\.(jpg|jpeg|png|gif|tiff|tif)", flags=re.IGNORECASE)
+cssImgCropRegex = re.compile(r"{{css image crop\|image *= *(.*)", flags=re.IGNORECASE)
+# In testing, got about 360k image names
+
+print("Getting input page-ids")
+pageIds = getInputPageIds()
+print(f"Found {len(pageIds)}")
+
+print("Opening databases")
+indexDbCon = sqlite3.connect(indexDb)
+indexDbCur = indexDbCon.cursor()
+imgDbCon = sqlite3.connect(imgDb)
+imgDbCur = imgDbCon.cursor()
+print("Checking tables")
+if imgDbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='page_imgs'").fetchone() == None:
+	# Create tables if not present
+	imgDbCur.execute("CREATE TABLE page_imgs (page_id INT PRIMARY KEY, img_name TEXT)") # img_name may be NULL
+	imgDbCur.execute("CREATE INDEX page_imgs_idx ON page_imgs(img_name)")
+else:
+	# Check for already-processed page IDs
+	numSkipped = 0
+	for (pid,) in imgDbCur.execute("SELECT page_id FROM page_imgs"):
+		if pid in pageIds:
+			pageIds.remove(pid)
+			numSkipped += 1
+		else:
+			print(f"WARNING: Found already-processed page ID {pid} which was not in input set")
+	print(f"Will skip {numSkipped} already-processed page IDs")
+
+print("Getting dump-file offsets")
+offsetToPageids = {}
+offsetToEnd = {} # Maps chunk-start offsets to their chunk-end offsets
+iterNum = 0
+for pageId in pageIds:
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	query = "SELECT offset, next_offset FROM offsets WHERE id = ?"
+	row = indexDbCur.execute(query, (pageId,)).fetchone()
+	if row == None:
+		print(f"WARNING: Page ID {pageId} not found")
+		continue
+	(chunkOffset, endOffset) = row
+	offsetToEnd[chunkOffset] = endOffset
+	if chunkOffset not in offsetToPageids:
+		offsetToPageids[chunkOffset] = []
+	offsetToPageids[chunkOffset].append(pageId)
+print(f"Found {len(offsetToEnd)} chunks to check")
+
+print("Iterating through chunks in dump file")
+def getImageName(content):
+	" Given an array of text-content lines, tries to return an infoxbox image name, or None "
+	# Doesn't try and find images in outside-infobox [[File:...]] and <imagemap> sections
+	for line in content:
+		match = imageLineRegex.match(line)
+		if match != None:
+			imageName = match.group(1).strip()
+			if imageName == "":
+				return None
+			imageName = html.unescape(imageName)
+			# Account for {{...
+			if imageName.startswith("{"):
+				match = cssImgCropRegex.match(imageName)
+				if match == None:
+					return None
+				imageName = match.group(1)
+			# Account for [[File:...|...]]
+			if imageName.startswith("["):
+				match = bracketImageRegex.match(imageName)
+				if match == None:
+					return None
+				imageName = match.group(1)
+			# Account for <!--
+			if imageName.find("<!--") != -1:
+				return None
+			# Remove an initial 'File:'
+			if imageName.startswith("File:"):
+				imageName = imageName[5:]
+			# Remove an initial 'Image:'
+			if imageName.startswith("Image:"):
+				imageName = imageName[6:]
+			# Check for extension
+			match = imageNameRegex.match(imageName)
+			if match != None:
+				imageName = match.group(0)
+				imageName = urllib.parse.unquote(imageName)
+				imageName = html.unescape(imageName) # Intentionally unescaping again (handles some odd cases)
+				imageName = imageName.replace("_", " ")
+				return imageName
+			# Exclude lines like: | image = &lt;imagemap&gt;
+			return None
+	return None
+with open(dumpFile, mode='rb') as file:
+	iterNum = 0
+	for (pageOffset, endOffset) in offsetToEnd.items():
+		iterNum += 1
+		if iterNum % 100 == 0:
+			print(f"At iteration {iterNum}")
+		#
+		pageIds = offsetToPageids[pageOffset]
+		# Jump to chunk
+		file.seek(pageOffset)
+		compressedData = file.read(None if endOffset == -1 else endOffset - pageOffset)
+		data = bz2.BZ2Decompressor().decompress(compressedData).decode()
+		# Look in chunk for pages
+		lines = data.splitlines()
+		lineIdx = 0
+		while lineIdx < len(lines):
+			# Look for <page>
+			if lines[lineIdx].lstrip() != "<page>":
+				lineIdx += 1
+				continue
+			# Check page id
+			lineIdx += 3
+			idLine = lines[lineIdx].lstrip()
+			match = idLineRegex.fullmatch(idLine)
+			if match == None or int(match.group(1)) not in pageIds:
+				lineIdx += 1
+				continue
+			pageId = int(match.group(1))
+			lineIdx += 1
+			# Look for <text> in <page>
+			foundText = False
+			while lineIdx < len(lines):
+				if not lines[lineIdx].lstrip().startswith("<text "):
+					lineIdx += 1
+					continue
+				foundText = True
+				# Get text content
+				content = []
+				line = lines[lineIdx]
+				content.append(line[line.find(">") + 1:])
+				lineIdx += 1
+				foundTextEnd = False
+				while lineIdx < len(lines):
+					line = lines[lineIdx]
+					if not line.endswith("</text>"):
+						content.append(line)
+						lineIdx += 1
+						continue
+					foundTextEnd = True
+					content.append(line[:line.rfind("</text>")])
+					# Look for image-filename
+					imageName = getImageName(content)
+					imgDbCur.execute("INSERT into page_imgs VALUES (?, ?)", (pageId, imageName))
+					break
+				if not foundTextEnd:
+					print(f"WARNING: Did not find </text> for page id {pageId}")
+				break
+			if not foundText:
+				print(f"WARNING: Did not find <text> for page id {pageId}")
+
+print("Closing databases")
+indexDbCon.close()
+imgDbCon.commit()
+imgDbCon.close()
diff --git a/backend/tolData/enwiki/lookupPage.py b/backend/tolData/enwiki/lookupPage.py
new file mode 100755
index 0000000..1a90851
--- /dev/null
+++ b/backend/tolData/enwiki/lookupPage.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python3
+
+import sys, re
+import bz2
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]} title1
+
+Looks up a page with title title1 in the wiki dump, using
+the dump-index db, and prints the corresponding <page>.
+"""
+if len(sys.argv) != 2:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2"
+indexDb = "dumpIndex.db"
+pageTitle = sys.argv[1].replace("_", " ")
+
+print("Looking up offset in index db")
+dbCon = sqlite3.connect(indexDb)
+dbCur = dbCon.cursor()
+query = "SELECT title, offset, next_offset FROM offsets WHERE title = ?"
+row = dbCur.execute(query, (pageTitle,)).fetchone()
+if row == None:
+	print("Title not found")
+	sys.exit(0)
+_, pageOffset, endOffset = row
+dbCon.close()
+print(f"Found chunk at offset {pageOffset}")
+
+print("Reading from wiki dump")
+content = []
+with open(dumpFile, mode='rb') as file:
+	# Get uncompressed chunk
+	file.seek(pageOffset)
+	compressedData = file.read(None if endOffset == -1 else endOffset - pageOffset)
+	data = bz2.BZ2Decompressor().decompress(compressedData).decode()
+	# Look in chunk for page
+	lines = data.splitlines()
+	lineIdx = 0
+	found = False
+	pageNum = 0
+	while not found:
+		line = lines[lineIdx]
+		if line.lstrip() == "<page>":
+			pageNum += 1
+			if pageNum > 100:
+				print("ERROR: Did not find title after 100 pages")
+				break
+			lineIdx += 1
+			titleLine = lines[lineIdx]
+			if titleLine.lstrip() == '<title>' + pageTitle + '</title>':
+				found = True
+				print(f"Found title in chunk as page {pageNum}")
+				content.append(line)
+				content.append(titleLine)
+				while True:
+					lineIdx += 1
+					line = lines[lineIdx]
+					content.append(line)
+					if line.lstrip() == "</page>":
+						break
+		lineIdx += 1
+
+print("Content: ")
+print("\n".join(content))
diff --git a/backend/tolData/eol/README.md b/backend/tolData/eol/README.md
new file mode 100644
index 0000000..8c527a8
--- /dev/null
+++ b/backend/tolData/eol/README.md
@@ -0,0 +1,26 @@
+This directory holds files obtained from/using the [Encyclopedia of Life](https://eol.org/).
+
+# Name Data Files
+-   vernacularNames.csv <br>
+    Obtained from <https://opendata.eol.org/dataset/vernacular-names> on 24/04/2022 (last updated on 27/10/2020).
+    Contains alternative-name data from EOL.
+
+# Image Metadata Files
+-   imagesList.tgz <br>
+    Obtained from <https://opendata.eol.org/dataset/images-list> on 24/04/2022 (last updated on 05/02/2020).
+    Contains metadata for images from EOL.
+-   imagesList/ <br>
+    Extracted from imagesList.tgz.
+-   genImagesListDb.sh <br>
+    Creates a database, and imports imagesList/*.csv files into it.
+-   imagesList.db <br>
+    Created by running genImagesListDb.sh <br>
+    Tables: <br>
+    -   `images`:
+        `content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT`
+
+# Image Generation Files
+-   downloadImgs.py <br>
+    Used to download image files into imgsForReview/.
+-   reviewImgs.py <br>
+    Used to review images in imgsForReview/, moving acceptable ones into imgs/.
diff --git a/backend/tolData/eol/downloadImgs.py b/backend/tolData/eol/downloadImgs.py
new file mode 100755
index 0000000..96bc085
--- /dev/null
+++ b/backend/tolData/eol/downloadImgs.py
@@ -0,0 +1,147 @@
+#!/usr/bin/python3
+
+import sys, re, os, random
+import sqlite3
+import urllib.parse, requests
+import time
+from threading import Thread
+import signal
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+For some set of EOL IDs, downloads associated images from URLs in
+an image-list database. Uses multiple downloading threads.
+
+May obtain multiple images per ID. The images will get names
+with the form 'eolId1 contentId1.ext1'.
+
+SIGINT causes the program to finish ongoing downloads and exit.
+The program can be re-run to continue downloading. It looks for
+already-downloaded files, and continues after the one with
+highest EOL ID.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+# In testing, this downloaded about 70k images, over a few days
+
+imagesListDb = "imagesList.db"
+def getInputEolIds():
+	eolIds = set()
+	dbCon = sqlite3.connect("../data.db")
+	dbCur = dbCon.cursor()
+	for (id,) in dbCur.execute("SELECT id FROM eol_ids"):
+		eolIds.add(id)
+	dbCon.close()
+	return eolIds
+outDir = "imgsForReview/"
+MAX_IMGS_PER_ID = 3
+MAX_THREADS = 5
+POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread)
+POST_DL_DELAY_MAX = 3
+LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain"
+
+print("Getting input EOL IDs")
+eolIds = getInputEolIds()
+print("Getting EOL IDs to download for")
+# Get IDs from images-list db
+imgDbCon = sqlite3.connect(imagesListDb)
+imgCur = imgDbCon.cursor()
+imgListIds = set()
+for (pageId,) in imgCur.execute("SELECT DISTINCT page_id FROM images"):
+	imgListIds.add(pageId)
+# Get set intersection, and sort into list
+eolIds = eolIds.intersection(imgListIds)
+eolIds = sorted(eolIds)
+print(f"Result: {len(eolIds)} EOL IDs")
+
+print("Checking output directory")
+if not os.path.exists(outDir):
+	os.mkdir(outDir)
+print("Finding next ID to download for")
+nextIdx = 0
+fileList = os.listdir(outDir)
+ids = [int(filename.split(" ")[0]) for filename in fileList]
+if len(ids) > 0:
+	ids.sort()
+	nextIdx = eolIds.index(ids[-1]) + 1
+if nextIdx == len(eolIds):
+	print("No IDs left. Exiting...")
+	sys.exit(0)
+
+print("Starting download threads")
+numThreads = 0
+threadException = None # Used for ending main thread after a non-main thread exception
+# Handle SIGINT signals
+interrupted = False
+oldHandler = None
+def onSigint(sig, frame):
+	global interrupted
+	interrupted = True
+	signal.signal(signal.SIGINT, oldHandler)
+oldHandler = signal.signal(signal.SIGINT, onSigint)
+# Function for threads to execute
+def downloadImg(url, outFile):
+	global numThreads, threadException
+	try:
+		data = requests.get(url)
+		with open(outFile, 'wb') as file:
+			file.write(data.content)
+		time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN)
+	except Exception as e:
+		print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr)
+		threadException = e
+	numThreads -= 1
+# Manage downloading
+for idx in range(nextIdx, len(eolIds)):
+	eolId = eolIds[idx]
+	# Get image urls
+	imgDataList = []
+	ownerSet = set() # Used to get images from different owners, for variety
+	exitLoop = False
+	query = "SELECT content_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?"
+	for (contentId, url, license, copyrightOwner) in imgCur.execute(query, (eolId,)):
+		if url.startswith("data/"):
+			url = "https://content.eol.org/" + url
+		urlParts = urllib.parse.urlparse(url)
+		extension = os.path.splitext(urlParts.path)[1]
+		if len(extension) <= 1:
+			print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr)
+			continue
+		# Check image-quantity limit
+		if len(ownerSet) == MAX_IMGS_PER_ID:
+			break
+		# Check for skip conditions
+		if re.fullmatch(LICENSE_REGEX, license) == None:
+			continue
+		if len(copyrightOwner) > 100: # Avoid certain copyrightOwner fields that seem long and problematic
+			continue
+		if copyrightOwner in ownerSet:
+			continue
+		ownerSet.add(copyrightOwner)
+		# Determine output filename
+		outPath = f"{outDir}{eolId} {contentId}{extension}"
+		if os.path.exists(outPath):
+			print(f"WARNING: {outPath} already exists. Skipping download.")
+			continue
+		# Check thread limit
+		while numThreads == MAX_THREADS:
+			time.sleep(1)
+		# Wait for threads after an interrupt or thread-exception
+		if interrupted or threadException != None:
+			print("Waiting for existing threads to end")
+			while numThreads > 0:
+				time.sleep(1)
+			exitLoop = True
+			break
+		# Perform download
+		print(f"Downloading image to {outPath}")
+		numThreads += 1
+		thread = Thread(target=downloadImg, args=(url, outPath), daemon=True)
+		thread.start()
+	if exitLoop:
+		break
+# Close images-list db
+print("Finished downloading")
+imgDbCon.close()
diff --git a/backend/tolData/eol/genImagesListDb.sh b/backend/tolData/eol/genImagesListDb.sh
new file mode 100755
index 0000000..87dd840
--- /dev/null
+++ b/backend/tolData/eol/genImagesListDb.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -e
+
+# Combine CSV files into one, skipping header lines
+cat imagesList/media_*_{1..58}.csv | tail -n +2 > imagesList.csv
+# Create database, and import the CSV file
+sqlite3 imagesList.db <<END
+CREATE TABLE images (
+	content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT);
+.mode csv
+.import 'imagesList.csv' images
+END
diff --git a/backend/tolData/eol/reviewImgs.py b/backend/tolData/eol/reviewImgs.py
new file mode 100755
index 0000000..ecdf7ab
--- /dev/null
+++ b/backend/tolData/eol/reviewImgs.py
@@ -0,0 +1,205 @@
+#!/usr/bin/python3
+
+import sys, re, os, time
+import sqlite3
+import tkinter as tki
+from tkinter import ttk
+import PIL
+from PIL import ImageTk, Image, ImageOps
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Provides a GUI for reviewing images. Looks in a for-review directory for
+images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to
+choose an image to keep, or reject all. Also provides image rotation.
+Chosen images are placed in another directory, and rejected ones are deleted.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+imgDir = "imgsForReview/"
+outDir = "imgs/"
+extraInfoDbCon = sqlite3.connect("../data.db")
+extraInfoDbCur = extraInfoDbCon.cursor()
+def getExtraInfo(eolId):
+	global extraInfoDbCur
+	query = "SELECT names.alt_name FROM" \
+		" names INNER JOIN eol_ids ON eol_ids.name = names.name" \
+		" WHERE id = ? and pref_alt = 1"
+	row = extraInfoDbCur.execute(query, (eolId,)).fetchone()
+	if row != None:
+		return f"Reviewing EOL ID {eolId}, aka \"{row[0]}\""
+	else:
+		return f"Reviewing EOL ID {eolId}"
+IMG_DISPLAY_SZ = 400
+MAX_IMGS_PER_ID = 3
+IMG_BG_COLOR = (88, 28, 135)
+PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), IMG_BG_COLOR)
+
+print("Checking output directory")
+if not os.path.exists(outDir):
+	os.mkdir(outDir)
+print("Getting input image list")
+imgList = os.listdir(imgDir)
+imgList.sort(key=lambda s: int(s.split(" ")[0]))
+if len(imgList) == 0:
+	print("No input images found")
+	sys.exit(0)
+
+class EolImgReviewer:
+	" Provides the GUI for reviewing images "
+	def __init__(self, root, imgList):
+		self.root = root
+		root.title("EOL Image Reviewer")
+		# Setup main frame
+		mainFrame = ttk.Frame(root, padding="5 5 5 5")
+		mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S))
+		root.columnconfigure(0, weight=1)
+		root.rowconfigure(0, weight=1)
+		# Set up images-to-be-reviewed frames
+		self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation
+		self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter
+			# These need a persistent reference for some reason (doesn't display otherwise)
+		self.labels = []
+		for i in range(MAX_IMGS_PER_ID):
+			frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ)
+			frame.grid(column=i, row=0)
+			label = ttk.Label(frame, image=self.photoImgs[i])
+			label.grid(column=0, row=0)
+			self.labels.append(label)
+		# Add padding
+		for child in mainFrame.winfo_children():
+			child.grid_configure(padx=5, pady=5)
+		# Add keyboard bindings
+		root.bind("<q>", self.quit)
+		root.bind("<Key-j>", lambda evt: self.accept(0))
+		root.bind("<Key-k>", lambda evt: self.accept(1))
+		root.bind("<Key-l>", lambda evt: self.accept(2))
+		root.bind("<Key-i>", lambda evt: self.reject())
+		root.bind("<Key-a>", lambda evt: self.rotate(0))
+		root.bind("<Key-s>", lambda evt: self.rotate(1))
+		root.bind("<Key-d>", lambda evt: self.rotate(2))
+		root.bind("<Key-A>", lambda evt: self.rotate(0, True))
+		root.bind("<Key-S>", lambda evt: self.rotate(1, True))
+		root.bind("<Key-D>", lambda evt: self.rotate(2, True))
+		# Initialise images to review
+		self.imgList = imgList
+		self.imgListIdx = 0
+		self.nextEolId = 0
+		self.nextImgNames = []
+		self.rotations = []
+		self.getNextImgs()
+		# For displaying extra info
+		self.numReviewed = 0
+		self.startTime = time.time()
+	def getNextImgs(self):
+		" Updates display with new images to review, or ends program "
+		# Gather names of next images to review
+		for i in range(MAX_IMGS_PER_ID):
+			if self.imgListIdx == len(self.imgList):
+				if i == 0:
+					self.quit()
+					return
+				break
+			imgName = self.imgList[self.imgListIdx]
+			eolId = int(re.match(r"(\d+) (\d+)", imgName).group(1))
+			if i == 0:
+				self.nextEolId = eolId
+				self.nextImgNames = [imgName]
+				self.rotations = [0]
+			else:
+				if self.nextEolId != eolId:
+					break
+				self.nextImgNames.append(imgName)
+				self.rotations.append(0)
+			self.imgListIdx += 1
+		# Update displayed images
+		idx = 0
+		while idx < MAX_IMGS_PER_ID:
+			if idx < len(self.nextImgNames):
+				try:
+					img = Image.open(imgDir + self.nextImgNames[idx])
+					img = ImageOps.exif_transpose(img)
+				except PIL.UnidentifiedImageError:
+					os.remove(imgDir + self.nextImgNames[idx])
+					del self.nextImgNames[idx]
+					del self.rotations[idx]
+					continue
+				self.imgs[idx] = self.resizeImgForDisplay(img)
+			else:
+				self.imgs[idx] = PLACEHOLDER_IMG
+			self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx])
+			self.labels[idx].config(image=self.photoImgs[idx])
+			idx += 1
+		# Restart if all image files non-recognisable
+		if len(self.nextImgNames) == 0:
+			self.getNextImgs()
+			return
+		# Update title
+		firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1
+		lastImgIdx = self.imgListIdx
+		title = getExtraInfo(self.nextEolId)
+		title += f" (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})"
+		self.root.title(title)
+	def accept(self, imgIdx):
+		" React to a user selecting an image "
+		if imgIdx >= len(self.nextImgNames):
+			print("Invalid selection")
+			return
+		for i in range(len(self.nextImgNames)):
+			inFile = imgDir + self.nextImgNames[i]
+			if i == imgIdx: # Move accepted image, rotating if needed
+				outFile = outDir + self.nextImgNames[i]
+				img = Image.open(inFile)
+				img = ImageOps.exif_transpose(img)
+				if self.rotations[i] != 0:
+					img = img.rotate(self.rotations[i], expand=True)
+				img.save(outFile)
+				os.remove(inFile)
+			else: # Delete non-accepted image
+				os.remove(inFile)
+		self.numReviewed += 1
+		self.getNextImgs()
+	def reject(self):
+		" React to a user rejecting all images of a set "
+		for i in range(len(self.nextImgNames)):
+			os.remove(imgDir + self.nextImgNames[i])
+		self.numReviewed += 1
+		self.getNextImgs()
+	def rotate(self, imgIdx, anticlockwise = False):
+		" Respond to a user rotating an image "
+		deg = -90 if not anticlockwise else 90
+		self.imgs[imgIdx] = self.imgs[imgIdx].rotate(deg)
+		self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx])
+		self.labels[imgIdx].config(image=self.photoImgs[imgIdx])
+		self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360
+	def quit(self, e = None):
+		global extraInfoDbCon
+		print(f"Number reviewed: {self.numReviewed}")
+		timeElapsed = time.time() - self.startTime
+		print(f"Time elapsed: {timeElapsed:.2f} seconds")
+		if self.numReviewed > 0:
+			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
+		extraInfoDbCon.close()
+		self.root.destroy()
+	def resizeImgForDisplay(self, img):
+		" Returns a copy of an image, shrunk to fit in it's frame (keeps aspect ratio), and with a background "
+		if max(img.width, img.height) > IMG_DISPLAY_SZ:
+			if (img.width > img.height):
+				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
+				img = img.resize((IMG_DISPLAY_SZ, newHeight))
+			else:
+				newWidth = int(img.width * IMG_DISPLAY_SZ / img.height)
+				img = img.resize((newWidth, IMG_DISPLAY_SZ))
+		bgImg = PLACEHOLDER_IMG.copy()
+		bgImg.paste(img, box=(
+			int((IMG_DISPLAY_SZ - img.width) / 2),
+			int((IMG_DISPLAY_SZ - img.height) / 2)))
+		return bgImg
+# Create GUI and defer control
+print("Starting GUI")
+root = tki.Tk()
+EolImgReviewer(root, imgList)
+root.mainloop()
diff --git a/backend/tolData/genDbpData.py b/backend/tolData/genDbpData.py
new file mode 100755
index 0000000..df3a6be
--- /dev/null
+++ b/backend/tolData/genDbpData.py
@@ -0,0 +1,247 @@
+#!/usr/bin/python3
+
+import sys, os, re
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads a database containing data from DBpedia, and tries to associate
+DBpedia IRIs with nodes in a database, adding short-descriptions for them.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+dbpediaDb = "dbpedia/descData.db"
+namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
+pickedLabelsFile = "pickedDbpLabels.txt"
+dbFile = "data.db"
+rootNodeName = "cellular organisms"
+rootLabel = "organism" # Will be associated with root node
+# Got about 400k descriptions when testing
+
+print("Opening databases")
+dbpCon = sqlite3.connect(dbpediaDb)
+dbpCur = dbpCon.cursor()
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print("Getting node names")
+nodeNames = set()
+for (name,) in dbCur.execute("SELECT name from nodes"):
+	nodeNames.add(name)
+
+print("Checking for names to skip")
+oldSz = len(nodeNames)
+if os.path.exists(namesToSkipFile):
+	with open(namesToSkipFile) as file:
+		for line in file:
+			nodeNames.remove(line.rstrip())
+print(f"Skipping {oldSz - len(nodeNames)} nodes")
+
+print("Reading disambiguation-page labels")
+disambigLabels = set()
+query = "SELECT labels.iri from labels INNER JOIN disambiguations ON labels.iri = disambiguations.iri"
+for (label,) in dbpCur.execute(query):
+	disambigLabels.add(label)
+
+print("Trying to associate nodes with DBpedia labels")
+nodeToLabel = {}
+nameVariantRegex = re.compile(r"(.*) \(([^)]+)\)") # Used to recognise labels like 'Thor (shrimp)'
+nameToVariants = {} # Maps node names to lists of matching labels
+iterNum = 0
+for (label,) in dbpCur.execute("SELECT label from labels"):
+	iterNum += 1
+	if iterNum % 1e5 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	if label in disambigLabels:
+		continue
+	name = label.lower()
+	if name in nodeNames:
+		if name not in nameToVariants:
+			nameToVariants[name] = [label]
+		elif label not in nameToVariants[name]:
+			nameToVariants[name].append(label)
+	else:
+		match = nameVariantRegex.fullmatch(name)
+		if match != None:
+			subName = match.group(1)
+			if subName in nodeNames and match.group(2) != "disambiguation":
+				if subName not in nameToVariants:
+					nameToVariants[subName] = [label]
+				elif name not in nameToVariants[subName]:
+					nameToVariants[subName].append(label)
+# Associate labels without conflicts
+for (name, variants) in nameToVariants.items():
+	if len(variants) == 1:
+		nodeToLabel[name] = variants[0]
+for name in nodeToLabel:
+	del nameToVariants[name]
+# Special case for root node
+nodeToLabel[rootNodeName] = rootLabel
+if rootNodeName in nameToVariants:
+	del nameToVariants["cellular organisms"]
+
+print("Trying to resolve {len(nameToVariants)} conflicts")
+def resolveWithPickedLabels():
+	" Attempts to resolve conflicts using a picked-names file "
+	with open(pickedLabelsFile) as file:
+		for line in file:
+			(name, _, label) = line.rstrip().partition("|")
+			if name not in nameToVariants:
+				print(f"WARNING: No conflict found for name \"{name}\"", file=sys.stderr)
+				continue
+			if label == "":
+				del nameToVariants[name]
+			else:
+				if label not in nameToVariants[name]:
+					print(f"INFO: Picked label \"{label}\" for name \"{name}\" outside choice set", file=sys.stderr)
+				nodeToLabel[name] = label
+				del nameToVariants[name]
+def resolveWithCategoryList():
+	"""
+	Attempts to resolve conflicts by looking for labels like 'name1 (category1)',
+	and choosing those with a category1 that seems 'biological'.
+	Does two passes, using more generic categories first. This helps avoid stuff like
+	Pan being classified as a horse instead of an ape.
+	"""
+	generalCategories = {
+		"species", "genus",
+		"plant", "fungus", "animal",
+		"annelid", "mollusc", "arthropod", "crustacean", "insect", "bug",
+		"fish", "amphibian", "reptile", "bird", "mammal",
+	}
+	specificCategories = {
+		"protist", "alveolate", "dinoflagellates",
+		"orchid", "poaceae", "fern", "moss", "alga",
+		"bryozoan", "hydrozoan",
+		"sponge", "cnidarian", "coral", "polychaete", "echinoderm",
+		"bivalve", "gastropod", "chiton",
+		"shrimp", "decapod", "crab", "barnacle", "copepod",
+		"arachnid", "spider", "harvestman", "mite",
+		"dragonfly", "mantis", "cicada", "grasshopper", "planthopper",
+			"beetle", "fly", "butterfly", "moth", "wasp",
+		"catfish",
+		"frog",
+		"lizard",
+		"horse", "sheep", "cattle", "mouse",
+	}
+	namesToRemove = set()
+	for (name, variants) in nameToVariants.items():
+		found = False
+		for label in variants:
+			match = nameVariantRegex.match(label)
+			if match != None and match.group(2) in generalCategories:
+				nodeToLabel[name] = label
+				namesToRemove.add(name)
+				found = True
+				break
+		if not found:
+			for label in variants:
+				match = nameVariantRegex.match(label)
+				if match != None and match.group(2) in specificCategories:
+					nodeToLabel[name] = label
+					namesToRemove.add(name)
+					break
+	for name in namesToRemove:
+		del nameToVariants[name]
+def resolveWithTypeData():
+	" Attempts to resolve conflicts using DBpedia's type data "
+	taxonTypes = { # Obtained from the DBpedia ontology
+		"http://dbpedia.org/ontology/Species",
+		"http://dbpedia.org/ontology/Archaea",
+		"http://dbpedia.org/ontology/Bacteria",
+		"http://dbpedia.org/ontology/Eukaryote",
+		"http://dbpedia.org/ontology/Plant",
+		"http://dbpedia.org/ontology/ClubMoss",
+		"http://dbpedia.org/ontology/Conifer",
+		"http://dbpedia.org/ontology/CultivatedVariety",
+		"http://dbpedia.org/ontology/Cycad",
+		"http://dbpedia.org/ontology/Fern",
+		"http://dbpedia.org/ontology/FloweringPlant",
+		"http://dbpedia.org/ontology/Grape",
+		"http://dbpedia.org/ontology/Ginkgo",
+		"http://dbpedia.org/ontology/Gnetophytes",
+		"http://dbpedia.org/ontology/GreenAlga",
+		"http://dbpedia.org/ontology/Moss",
+		"http://dbpedia.org/ontology/Fungus",
+		"http://dbpedia.org/ontology/Animal",
+		"http://dbpedia.org/ontology/Fish",
+		"http://dbpedia.org/ontology/Crustacean",
+		"http://dbpedia.org/ontology/Mollusca",
+		"http://dbpedia.org/ontology/Insect",
+		"http://dbpedia.org/ontology/Arachnid",
+		"http://dbpedia.org/ontology/Amphibian",
+		"http://dbpedia.org/ontology/Reptile",
+		"http://dbpedia.org/ontology/Bird",
+		"http://dbpedia.org/ontology/Mammal",
+		"http://dbpedia.org/ontology/Cat",
+		"http://dbpedia.org/ontology/Dog",
+		"http://dbpedia.org/ontology/Horse",
+	}
+	iterNum = 0
+	for (label, type) in dbpCur.execute("SELECT label, type from labels INNER JOIN types on labels.iri = types.iri"):
+		iterNum += 1
+		if iterNum % 1e5 == 0:
+			print(f"At iteration {iterNum}")
+		#
+		if type in taxonTypes:
+			name = label.lower()
+			if name in nameToVariants:
+				nodeToLabel[name] = label
+				del nameToVariants[name]
+			else:
+				match = nameVariantRegex.fullmatch(name)
+				if match != None:
+					name = match.group(1)
+					if name in nameToVariants:
+						nodeToLabel[name] = label
+						del nameToVariants[name]
+#resolveWithTypeData()
+#resolveWithCategoryList()
+resolveWithPickedLabels()
+print(f"Remaining number of conflicts: {len(nameToVariants)}")
+
+print("Getting node IRIs")
+nodeToIri = {}
+for (name, label) in nodeToLabel.items():
+	(iri,) = dbpCur.execute("SELECT iri FROM labels where label = ? COLLATE NOCASE", (label,)).fetchone()
+	nodeToIri[name] = iri
+
+print("Resolving redirects")
+redirectingIriSet = set()
+iterNum = 0
+for (name, iri) in nodeToIri.items():
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	row = dbpCur.execute("SELECT target FROM redirects where iri = ?", (iri,)).fetchone()
+	if row != None:
+		nodeToIri[name] = row[0]
+		redirectingIriSet.add(name)
+
+print("Adding description tables")
+dbCur.execute("CREATE TABLE wiki_ids (name TEXT PRIMARY KEY, id INT, redirected INT)")
+dbCur.execute("CREATE INDEX wiki_id_idx ON wiki_ids(id)")
+dbCur.execute("CREATE TABLE descs (wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT)")
+iterNum = 0
+for (name, iri) in nodeToIri.items():
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	query = "SELECT abstract, id FROM abstracts INNER JOIN ids ON abstracts.iri = ids.iri WHERE ids.iri = ?"
+	row = dbpCur.execute(query, (iri,)).fetchone()
+	if row != None:
+		desc, wikiId = row
+		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, wikiId, 1 if name in redirectingIriSet else 0))
+		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (wikiId, desc, 1))
+
+print("Closing databases")
+dbCon.commit()
+dbCon.close()
+dbpCon.commit()
+dbpCon.close()
diff --git a/backend/tolData/genEnwikiDescData.py b/backend/tolData/genEnwikiDescData.py
new file mode 100755
index 0000000..d3f93ed
--- /dev/null
+++ b/backend/tolData/genEnwikiDescData.py
@@ -0,0 +1,102 @@
+#!/usr/bin/python3
+
+import sys, re, os
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads a database containing data from Wikipedia, and tries to associate
+wiki pages with nodes in the database, and add descriptions for nodes
+that don't have them.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+enwikiDb = "enwiki/descData.db"
+dbFile = "data.db"
+namesToSkipFile = "pickedEnwikiNamesToSkip.txt"
+pickedLabelsFile = "pickedEnwikiLabels.txt"
+# Got about 25k descriptions when testing
+
+print("Opening databases")
+enwikiCon = sqlite3.connect(enwikiDb)
+enwikiCur = enwikiCon.cursor()
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print("Checking for names to skip")
+namesToSkip = set()
+if os.path.exists(namesToSkipFile):
+	with open(namesToSkipFile) as file:
+		for line in file:
+			namesToSkip.add(line.rstrip())
+	print(f"Found {len(namesToSkip)}")
+print("Checking for picked-titles")
+nameToPickedTitle = {}
+if os.path.exists(pickedLabelsFile):
+	with open(pickedLabelsFile) as file:
+		for line in file:
+			(name, _, title) = line.rstrip().partition("|")
+			nameToPickedTitle[name.lower()] = title
+print(f"Found {len(nameToPickedTitle)}")
+
+print("Getting names of nodes without descriptions")
+nodeNames = set()
+query = "SELECT nodes.name FROM nodes LEFT JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id IS NULL"
+for (name,) in dbCur.execute(query):
+	nodeNames.add(name)
+print(f"Found {len(nodeNames)}")
+nodeNames.difference_update(namesToSkip)
+
+print("Associating nodes with page IDs")
+nodeToPageId = {}
+iterNum = 0
+for name in nodeNames:
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	if name not in nameToPickedTitle:
+		row = enwikiCur.execute("SELECT id FROM pages WHERE pages.title = ? COLLATE NOCASE", (name,)).fetchone()
+		if row != None:
+			nodeToPageId[name] = row[0]
+	else:
+		title = nameToPickedTitle[name]
+		row = enwikiCur.execute("SELECT id FROM pages WHERE pages.title = ?", (title,)).fetchone()
+		if row != None:
+			nodeToPageId[name] = row[0]
+		else:
+			print("WARNING: Picked title {title} not found", file=sys.stderr)
+
+print("Resolving redirects")
+redirectingNames = set()
+iterNum = 0
+for (name, pageId) in nodeToPageId.items():
+	iterNum += 1
+	if iterNum % 1e3 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	query = "SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?"
+	row = enwikiCur.execute(query, (pageId,)).fetchone()
+	if row != None:
+		nodeToPageId[name] = row[0]
+		redirectingNames.add(name)
+
+print("Adding description data")
+iterNum = 0
+for (name, pageId) in nodeToPageId.items():
+	iterNum += 1
+	if iterNum % 1e3 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	row = enwikiCur.execute("SELECT desc FROM descs where descs.id = ?", (pageId,)).fetchone()
+	if row != None:
+		dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, pageId, 1 if name in redirectingNames else 0))
+		dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (pageId, row[0], 0))
+
+print("Closing databases")
+dbCon.commit()
+dbCon.close()
+enwikiCon.close()
diff --git a/backend/tolData/genEnwikiNameData.py b/backend/tolData/genEnwikiNameData.py
new file mode 100755
index 0000000..7ad61d1
--- /dev/null
+++ b/backend/tolData/genEnwikiNameData.py
@@ -0,0 +1,76 @@
+#!/usr/bin/python3
+
+import sys, re
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads from a database containing data from Wikipdia, along with
+node and wiki-id information from the database, and use wikipedia
+page-redirect information to add additional alt-name data.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+enwikiDb = "enwiki/descData.db"
+dbFile = "data.db"
+altNameRegex = re.compile(r"[a-zA-Z]+")
+	# Avoids names like 'Evolution of Elephants', 'Banana fiber', 'Fish (zoology)',
+
+print("Opening databases")
+enwikiCon = sqlite3.connect(enwikiDb)
+enwikiCur = enwikiCon.cursor()
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print("Getting nodes with wiki IDs")
+nodeToWikiId = {}
+for (nodeName, wikiId) in dbCur.execute("SELECT name, id from wiki_ids"):
+	nodeToWikiId[nodeName] = wikiId
+print(f"Found {len(nodeToWikiId)}")
+
+print("Iterating through nodes, finding names that redirect to them")
+nodeToAltNames = {}
+numAltNames = 0
+iterNum = 0
+for (nodeName, wikiId) in nodeToWikiId.items():
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	nodeToAltNames[nodeName] = set()
+	query = "SELECT p1.title FROM pages p1" \
+		" INNER JOIN redirects r1 ON p1.id = r1.id" \
+		" INNER JOIN pages p2 ON r1.target = p2.title WHERE p2.id = ?"
+	for (name,) in enwikiCur.execute(query, (wikiId,)):
+		if altNameRegex.fullmatch(name) != None and name.lower() != nodeName:
+			nodeToAltNames[nodeName].add(name.lower())
+			numAltNames += 1
+print(f"Found {numAltNames} alt-names")
+
+print("Excluding existing alt-names from the set")
+query = "SELECT alt_name FROM names WHERE alt_name IN ({})"
+iterNum = 0
+for (nodeName, altNames) in nodeToAltNames.items():
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	existingNames = set()
+	for (name,) in dbCur.execute(query.format(",".join(["?"] * len(altNames))), list(altNames)):
+		existingNames.add(name)
+	numAltNames -= len(existingNames)
+	altNames.difference_update(existingNames)
+print(f"Left with {numAltNames} alt-names")
+
+print("Adding alt-names to database")
+for (nodeName, altNames) in nodeToAltNames.items():
+	for altName in altNames:
+		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'enwiki')", (nodeName, altName, 0))
+
+print("Closing databases")
+dbCon.commit()
+dbCon.close()
+enwikiCon.close()
diff --git a/backend/tolData/genEolNameData.py b/backend/tolData/genEolNameData.py
new file mode 100755
index 0000000..dd33ee0
--- /dev/null
+++ b/backend/tolData/genEolNameData.py
@@ -0,0 +1,184 @@
+#!/usr/bin/python3
+
+import sys, re, os
+import html, csv, sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads files describing name data from the 'Encyclopedia of Life' site,
+tries to associate names with nodes in the database, and adds tables
+to represent associated names.
+
+Reads a vernacularNames.csv file:
+	Starts with a header line containing:
+		page_id, canonical_form, vernacular_string, language_code,
+		resource_name, is_preferred_by_resource, is_preferred_by_eol
+	The canonical_form and vernacular_string fields contain names
+		associated with the page ID. Names are not always unique to
+		particular page IDs.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+vnamesFile = "eol/vernacularNames.csv" # Had about 2.8e6 entries
+dbFile = "data.db"
+namesToSkip = {"unknown", "unknown species", "unidentified species"}
+pickedIdsFile = "pickedEolIds.txt"
+altsToSkipFile = "pickedEolAltsToSkip.txt"
+
+print("Reading in vernacular-names data")
+nameToPids = {} # 'pid' means 'Page ID'
+canonicalNameToPids = {}
+pidToNames = {}
+pidToPreferred = {} # Maps pids to 'preferred' names
+def updateMaps(name, pid, canonical, preferredAlt):
+	global namesToSkip, nameToPids, canonicalNameToPids, pidToNames, pidToPreferred
+	if name in namesToSkip:
+		return
+	if name not in nameToPids:
+		nameToPids[name] = {pid}
+	else:
+		nameToPids[name].add(pid)
+	if canonical:
+		if name not in canonicalNameToPids:
+			canonicalNameToPids[name] = {pid}
+		else:
+			canonicalNameToPids[name].add(pid)
+	if pid not in pidToNames:
+		pidToNames[pid] = {name}
+	else:
+		pidToNames[pid].add(name)
+	if preferredAlt:
+		pidToPreferred[pid] = name
+with open(vnamesFile, newline="") as csvfile:
+	reader = csv.reader(csvfile)
+	lineNum = 0
+	for row in reader:
+		lineNum += 1
+		if lineNum % 1e5 == 0:
+			print(f"At line {lineNum}")
+		# Skip header line
+		if lineNum == 1:
+			continue
+		# Parse line
+		pid = int(row[0])
+		name1 = re.sub(r"<[^>]+>", "", row[1].lower()) # Remove tags
+		name2 = html.unescape(row[2]).lower()
+		lang = row[3]
+		preferred = row[6] == "preferred"
+		# Add to maps
+		updateMaps(name1, pid, True, False)
+		if lang == "eng" and name2 != "":
+			updateMaps(name2, pid, False, preferred)
+
+print("Checking for manually-picked pids")
+nameToPickedPid = {}
+if os.path.exists(pickedIdsFile):
+	with open(pickedIdsFile) as file:
+		for line in file:
+			(name, _, eolId) = line.rstrip().partition("|")
+			nameToPickedPid[name] = None if eolId == "" else int(eolId)
+print(f"Found {len(nameToPickedPid)}")
+
+print("Checking for alt-names to skip")
+nameToAltsToSkip = {}
+numToSkip = 0
+if os.path.exists(altsToSkipFile):
+	with open(altsToSkipFile) as file:
+		for line in file:
+			(name, _, altName) = line.rstrip().partition("|")
+			if name not in nameToAltsToSkip:
+				nameToAltsToSkip[name] = [altName]
+			else:
+				nameToAltsToSkip[name].append(altName)
+			numToSkip += 1
+print(f"Found {numToSkip} alt-names to skip")
+
+print("Creating database tables")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, pref_alt INT, src TEXT, PRIMARY KEY(name, alt_name))")
+dbCur.execute("CREATE INDEX names_idx ON names(name)")
+dbCur.execute("CREATE INDEX names_alt_idx ON names(alt_name)")
+dbCur.execute("CREATE INDEX names_alt_idx_nc ON names(alt_name COLLATE NOCASE)")
+dbCur.execute("CREATE TABLE eol_ids(id INT PRIMARY KEY, name TEXT)")
+dbCur.execute("CREATE INDEX eol_name_idx ON eol_ids(name)")
+
+print("Associating nodes with names")
+usedPids = set()
+unresolvedNodeNames = set()
+dbCur2 = dbCon.cursor()
+def addToDb(nodeName, pidToUse):
+	" Adds page-ID-associated name data to a node in the database "
+	global dbCur, pidToPreferred
+	dbCur.execute("INSERT INTO eol_ids VALUES (?, ?)", (pidToUse, nodeName))
+	# Get alt-names
+	altNames = set()
+	for n in pidToNames[pidToUse]:
+		# Avoid alt-names with >3 words
+		if len(n.split(" ")) > 3:
+			continue
+		# Avoid alt-names that already name a node in the database
+		if dbCur.execute("SELECT name FROM nodes WHERE name = ?", (n,)).fetchone() != None:
+			continue
+		# Check for picked alt-name-to-skip
+		if nodeName in nameToAltsToSkip and n in nameToAltsToSkip[nodeName]:
+			print(f"Excluding alt-name {n} for node {nodeName}")
+			continue
+		#
+		altNames.add(n)
+	# Add alt-names to db
+	preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
+	for n in altNames:
+		isPreferred = 1 if (n == preferredName) else 0
+		dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'eol')", (nodeName, n, isPreferred))
+print("Adding picked IDs")
+for (name, pid) in nameToPickedPid.items():
+	if pid != None:
+		addToDb(name, pid)
+		usedPids.add(pid)
+print("Associating nodes with canonical names")
+iterNum = 0
+for (nodeName,) in dbCur2.execute("SELECT name FROM nodes"):
+	iterNum += 1
+	if iterNum % 1e5 == 0:
+		print(f"At iteration {iterNum}")
+	if nodeName in nameToPickedPid:
+		continue
+	# Check for matching canonical name
+	if nodeName in canonicalNameToPids:
+		pidToUse = None
+		# Pick an associated page ID
+		for pid in canonicalNameToPids[nodeName]:
+			hasLowerPrio = pid not in pidToPreferred and pidToUse in pidToPreferred
+			hasHigherPrio = pid in pidToPreferred and pidToUse not in pidToPreferred
+			if hasLowerPrio:
+				continue
+			if pid not in usedPids and (pidToUse == None or pid < pidToUse or hasHigherPrio):
+				pidToUse = pid
+		if pidToUse != None:
+			addToDb(nodeName, pidToUse)
+			usedPids.add(pidToUse)
+	elif nodeName in nameToPids:
+		unresolvedNodeNames.add(nodeName)
+print("Associating leftover nodes with other names")
+iterNum = 0
+for nodeName in unresolvedNodeNames:
+	iterNum += 1
+	if iterNum % 100 == 0:
+		print(f"At iteration {iterNum}")
+	# Check for matching name
+	pidToUse = None
+	for pid in nameToPids[nodeName]:
+		# Pick an associated page ID
+		if pid not in usedPids and (pidToUse == None or pid < pidToUse):
+			pidToUse = pid
+	if pidToUse != None:
+		addToDb(nodeName, pidToUse)
+		usedPids.add(pidToUse)
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/genImgs.py b/backend/tolData/genImgs.py
new file mode 100755
index 0000000..ecca8e0
--- /dev/null
+++ b/backend/tolData/genImgs.py
@@ -0,0 +1,191 @@
+#!/usr/bin/python3
+
+import sys, os, subprocess
+import sqlite3, urllib.parse
+import signal
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads node IDs and image paths from a file, and possibly from a directory,
+and generates cropped/resized versions of those images into a directory,
+with names of the form 'nodeId1.jpg'. Also adds image metadata to the
+database.
+
+SIGINT can be used to stop, and the program can be re-run to continue
+processing. It uses already-existing database entries to decide what
+to skip.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+imgListFile = "imgList.txt"
+outDir = "img/"
+eolImgDb = "eol/imagesList.db"
+enwikiImgDb = "enwiki/imgData.db"
+pickedImgsDir = "pickedImgs/"
+pickedImgsFilename = "imgData.txt"
+dbFile = "data.db"
+IMG_OUT_SZ = 200
+genImgFiles = True # Usable for debugging
+
+if not os.path.exists(outDir):
+	os.mkdir(outDir)
+
+print("Opening databases")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+eolCon = sqlite3.connect(eolImgDb)
+eolCur = eolCon.cursor()
+enwikiCon = sqlite3.connect(enwikiImgDb)
+enwikiCur = enwikiCon.cursor()
+print("Checking for picked-images")
+nodeToPickedImg = {}
+if os.path.exists(pickedImgsDir + pickedImgsFilename):
+	lineNum = 0
+	with open(pickedImgsDir + pickedImgsFilename) as file:
+		for line in file:
+			lineNum += 1
+			(filename, url, license, artist, credit) = line.rstrip().split("|")
+			nodeName = os.path.splitext(filename)[0] # Remove extension
+			(otolId,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (nodeName,)).fetchone()
+			nodeToPickedImg[otolId] = {
+				"nodeName": nodeName, "id": lineNum,
+				"filename": filename, "url": url, "license": license, "artist": artist, "credit": credit,
+			}
+
+print("Checking for image tables")
+nodesDone = set()
+imgsDone = set()
+if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='node_imgs'").fetchone() == None:
+	# Add image tables if not present
+	dbCur.execute("CREATE TABLE node_imgs (name TEXT PRIMARY KEY, img_id INT, src TEXT)")
+	dbCur.execute("CREATE TABLE images" \
+		" (id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src))")
+else:
+	# Get existing image-associated nodes
+	for (otolId,) in dbCur.execute("SELECT nodes.id FROM node_imgs INNER JOIN nodes ON node_imgs.name = nodes.name"):
+		nodesDone.add(otolId)
+	# Get existing node-associated images
+	for (imgId, imgSrc) in dbCur.execute("SELECT id, src from images"):
+		imgsDone.add((imgId, imgSrc))
+	print(f"Found {len(nodesDone)} nodes and {len(imgsDone)} images to skip")
+
+# Set SIGINT handler
+interrupted = False
+def onSigint(sig, frame):
+	global interrupted
+	interrupted = True
+signal.signal(signal.SIGINT, onSigint)
+
+print("Iterating through input images")
+def quit():
+	print("Closing databases")
+	dbCon.commit()
+	dbCon.close()
+	eolCon.close()
+	enwikiCon.close()
+	sys.exit(0)
+def convertImage(imgPath, outPath):
+	print(f"Converting {imgPath} to {outPath}")
+	if os.path.exists(outPath):
+		print(f"ERROR: Output image already exists")
+		return False
+	try:
+		completedProcess = subprocess.run(
+			['npx', 'smartcrop-cli', '--width', str(IMG_OUT_SZ), '--height', str(IMG_OUT_SZ), imgPath, outPath],
+			stdout=subprocess.DEVNULL
+		)
+	except Exception as e:
+		print(f"ERROR: Exception while attempting to run smartcrop: {e}")
+		return False
+	if completedProcess.returncode != 0:
+		print(f"ERROR: smartcrop had exit status {completedProcess.returncode}")
+		return False
+	return True
+print("Processing picked-images")
+for (otolId, imgData) in nodeToPickedImg.items():
+	# Check for SIGINT event
+	if interrupted:
+		print("Exiting")
+		quit()
+	# Skip if already processed
+	if otolId in nodesDone:
+		continue
+	# Convert image
+	if genImgFiles:
+		success = convertImage(pickedImgsDir + imgData["filename"], outDir + otolId + ".jpg")
+		if not success:
+			quit()
+	else:
+		print(f"Processing {imgData['nodeName']}: {otolId}.jpg")
+	# Add entry to db
+	if (imgData["id"], "picked") not in imgsDone:
+		dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
+			(imgData["id"], "picked", imgData["url"], imgData["license"], imgData["artist"], imgData["credit"]))
+		imgsDone.add((imgData["id"], "picked"))
+	dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (imgData["nodeName"], imgData["id"], "picked"))
+	nodesDone.add(otolId)
+print("Processing images from eol and enwiki")
+iterNum = 0
+with open(imgListFile) as file:
+	for line in file:
+		iterNum += 1
+		# Check for SIGINT event
+		if interrupted:
+			print("Exiting")
+			break
+		# Skip lines without an image path
+		if line.find(" ") == -1:
+			continue
+		# Get filenames
+		(otolId, _, imgPath) = line.rstrip().partition(" ")
+		# Skip if already processed
+		if otolId in nodesDone:
+			continue
+		# Convert image
+		if genImgFiles:
+			success = convertImage(imgPath, outDir + otolId + ".jpg")
+			if not success:
+				break
+		else:
+			if iterNum % 1e4 == 0:
+				print(f"At iteration {iterNum}")
+		# Add entry to db
+		(nodeName,) = dbCur.execute("SELECT name FROM nodes WHERE id = ?", (otolId,)).fetchone()
+		fromEol = imgPath.startswith("eol/")
+		imgName = os.path.basename(os.path.normpath(imgPath)) # Get last path component
+		imgName = os.path.splitext(imgName)[0] # Remove extension
+		if fromEol:
+			eolId, _, contentId = imgName.partition(" ")
+			eolId, contentId = (int(eolId), int(contentId))
+			if (eolId, "eol") not in imgsDone:
+				query = "SELECT source_url, license, copyright_owner FROM images WHERE content_id = ?"
+				row = eolCur.execute(query, (contentId,)).fetchone()
+				if row == None:
+					print(f"ERROR: No image record for EOL ID {eolId}, content ID {contentId}")
+					break
+				(url, license, owner) = row
+				dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
+					(eolId, "eol", url, license, owner, ""))
+				imgsDone.add((eolId, "eol"))
+			dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, eolId, "eol"))
+		else:
+			enwikiId = int(imgName)
+			if (enwikiId, "enwiki") not in imgsDone:
+				query = "SELECT name, license, artist, credit FROM" \
+					" page_imgs INNER JOIN imgs ON page_imgs.img_name = imgs.name" \
+					" WHERE page_imgs.page_id = ?"
+				row = enwikiCur.execute(query, (enwikiId,)).fetchone()
+				if row == None:
+					print(f"ERROR: No image record for enwiki ID {enwikiId}")
+					break
+				(name, license, artist, credit) = row
+				url = "https://en.wikipedia.org/wiki/File:" + urllib.parse.quote(name)
+				dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)",
+					(enwikiId, "enwiki", url, license, artist, credit))
+				imgsDone.add((enwikiId, "enwiki"))
+			dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, enwikiId, "enwiki"))
+# Close dbs
+quit()
diff --git a/backend/tolData/genLinkedImgs.py b/backend/tolData/genLinkedImgs.py
new file mode 100755
index 0000000..a8e1322
--- /dev/null
+++ b/backend/tolData/genLinkedImgs.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python3
+
+import sys, re
+import sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Look for nodes without images in the database, and tries to
+associate them with images from their children.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+dbFile = "data.db"
+compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]")
+upPropagateCompoundImgs = False
+
+print("Opening databases")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_ids TEXT)")
+
+print("Getting nodes with images")
+resolvedNodes = {} # Will map node names to otol IDs with a usable image
+query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name"
+for (name, otolId) in dbCur.execute(query):
+	resolvedNodes[name] = otolId
+print(f"Found {len(resolvedNodes)}")
+
+print("Iterating through nodes, trying to resolve images for ancestors")
+nodesToResolve = {} # Maps a node name to a list of objects that represent possible child images
+processedNodes = {} # Map a node name to an OTOL ID, representing a child node whose image is to be used
+parentToChosenTips = {} # used to prefer images from children with more tips
+iterNum = 0
+while len(resolvedNodes) > 0:
+	iterNum += 1
+	if iterNum % 1e3 == 0:
+		print(f"At iteration {iterNum}")
+	# Get next node
+	(nodeName, otolId) = resolvedNodes.popitem()
+	processedNodes[nodeName] = otolId
+	# Traverse upwards, resolving ancestors if able
+	while True:
+		# Get parent
+		row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
+		if row == None or row[0] in processedNodes or row[0] in resolvedNodes:
+			break
+		parent = row[0]
+		# Get parent data
+		if parent not in nodesToResolve:
+			childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (parent,))]
+			query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames)))
+			childObjs = [{"name": row[0], "tips": row[1], "otolId": None} for row in dbCur.execute(query, childNames)]
+			childObjs.sort(key=lambda x: x["tips"], reverse=True)
+			nodesToResolve[parent] = childObjs
+		else:
+			childObjs = nodesToResolve[parent]
+		# Check if highest-tips child
+		if (childObjs[0]["name"] == nodeName):
+			# Resolve parent, and continue from it
+			dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, otolId))
+			del nodesToResolve[parent]
+			processedNodes[parent] = otolId
+			parentToChosenTips[parent] = childObjs[0]["tips"]
+			nodeName = parent
+			continue
+		else:
+			# Mark child as a potential choice
+			childObj = next(c for c in childObjs if c["name"] == nodeName)
+			childObj["otolId"] = otolId
+			break
+	# When out of resolved nodes, resolve nodesToResolve nodes, possibly adding more nodes to resolve
+	if len(resolvedNodes) == 0:
+		for (name, childObjs) in nodesToResolve.items():
+			childObj = next(c for c in childObjs if c["otolId"] != None)
+			resolvedNodes[name] = childObj["otolId"]
+			parentToChosenTips[name] = childObj["tips"]
+			dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["otolId"]))
+		nodesToResolve.clear()
+
+print("Replacing linked-images for compound nodes")
+iterNum = 0
+for nodeName in processedNodes.keys():
+	iterNum += 1
+	if iterNum % 1e4 == 0:
+		print(f"At iteration {iterNum}")
+	#
+	match = compoundNameRegex.fullmatch(nodeName)
+	if match != None:
+		# Replace associated image with subname images
+		(subName1, subName2) = match.group(1,2)
+		otolIdPair = ["", ""]
+		if subName1 in processedNodes:
+			otolIdPair[0] = processedNodes[subName1]
+		if subName2 in processedNodes:
+			otolIdPair[1] = processedNodes[subName2]
+		# Use no image if both subimages not found
+		if otolIdPair[0] == "" and otolIdPair[1] == "":
+			dbCur.execute("DELETE FROM linked_imgs WHERE name = ?", (nodeName,))
+			continue
+		# Add to db
+		dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
+			(otolIdPair[0] + "," + otolIdPair[1], nodeName))
+		# Possibly repeat operation upon parent/ancestors
+		if upPropagateCompoundImgs:
+			while True:
+				# Get parent
+				row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
+				if row != None:
+					parent = row[0]
+					# Check num tips
+					(numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone()
+					if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips:
+						# Replace associated image
+						dbCur.execute("UPDATE linked_imgs SET otol_ids = ? WHERE name = ?",
+							(otolIdPair[0] + "," + otolIdPair[1], parent))
+						nodeName = parent
+						continue
+				break
+
+print("Closing databases")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/genOtolData.py b/backend/tolData/genOtolData.py
new file mode 100755
index 0000000..b5e0055
--- /dev/null
+++ b/backend/tolData/genOtolData.py
@@ -0,0 +1,250 @@
+#!/usr/bin/python3
+
+import sys, re, os
+import json, sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Reads files describing a tree-of-life from an 'Open Tree of Life' release,
+and stores tree information in a database.
+
+Reads a labelled_supertree_ottnames.tre file, which is assumed to have this format:
+    The tree-of-life is represented in Newick format, which looks like: (n1,n2,(n3,n4)n5)n6
+		The root node is named n6, and has children n1, n2, and n5.
+    Name examples include: Homo_sapiens_ott770315, mrcaott6ott22687, 'Oxalis san-miguelii ott5748753', 
+		'ott770315' and 'mrcaott6ott22687' are node IDs. The latter is for a 'compound node'.
+		The node with ID 'ott770315' will get the name 'homo sapiens'.
+		A compound node will get a name composed from it's sub-nodes (eg: [name1 + name2]).
+	It is possible for multiple nodes to have the same name.
+		In these cases, extra nodes will be named sequentially, as 'name1 [2]', 'name1 [3]', etc.
+Reads an annotations.json file, which is assumed to have this format:
+    Holds a JSON object, whose 'nodes' property maps node IDs to objects holding information about that node,
+    such as the properties 'supported_by' and 'conflicts_with', which list phylogenetic trees that
+	support/conflict with the node's placement.
+Reads from a picked-names file, if present, which specifies name and node ID pairs.
+	These help resolve cases where multiple nodes share the same name.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+treeFile = "otol/labelled_supertree_ottnames.tre" # Had about 2.5e9 nodes
+annFile = "otol/annotations.json"
+dbFile = "data.db"
+nodeMap = {} # Maps node IDs to node objects
+nameToFirstId = {} # Maps node names to first found ID (names might have multiple IDs)
+dupNameToIds = {} # Maps names of nodes with multiple IDs to those IDs
+pickedNamesFile = "pickedOtolNames.txt"
+
+class Node:
+	" Represents a tree-of-life node "
+	def __init__(self, name, childIds, parentId, tips, pSupport):
+		self.name = name
+		self.childIds = childIds
+		self.parentId = parentId
+		self.tips = tips
+		self.pSupport = pSupport
+
+print("Parsing tree file")
+# Read file
+data = None
+with open(treeFile) as file:
+	data = file.read()
+dataIdx = 0
+# Parse content
+iterNum = 0
+def parseNewick():
+	" Parses a node using 'data' and 'dataIdx', updates nodeMap accordingly, and returns the node's ID "
+	global data, dataIdx, iterNum
+	iterNum += 1
+	if iterNum % 1e5 == 0:
+		print(f"At iteration {iterNum}")
+	# Check for EOF
+	if dataIdx == len(data):
+		raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
+	# Check for node
+	if data[dataIdx] == "(": # parse inner node
+		dataIdx += 1
+		childIds = []
+		while True:
+			# Read child
+			childId = parseNewick()
+			childIds.append(childId)
+			if (dataIdx == len(data)):
+				raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
+			# Check for next child
+			if (data[dataIdx] == ","):
+				dataIdx += 1
+				continue
+			else:
+				# Get node name and id
+				dataIdx += 1 # Consume an expected ')'
+				name, id = parseNewickName()
+				updateNameMaps(name, id)
+				# Get child num-tips total
+				tips = 0
+				for childId in childIds:
+					tips += nodeMap[childId].tips
+				# Add node to nodeMap
+				nodeMap[id] = Node(name, childIds, None, tips, False)
+				# Update childrens' parent reference
+				for childId in childIds:
+					nodeMap[childId].parentId = id
+				return id
+	else: # Parse node name
+		name, id = parseNewickName()
+		updateNameMaps(name, id)
+		nodeMap[id] = Node(name, [], None, 1, False)
+		return id
+def parseNewickName():
+	" Parses a node name using 'data' and 'dataIdx', and returns a (name, id) pair "
+	global data, dataIdx
+	name = None
+	end = dataIdx
+	# Get name
+	if (end < len(data) and data[end] == "'"): # Check for quoted name
+		end += 1
+		inQuote = True
+		while end < len(data):
+			if (data[end] == "'"):
+				if end + 1 < len(data) and data[end + 1] == "'": # Account for '' as escaped-quote
+					end += 2
+					continue
+				else:
+					end += 1
+					inQuote = False
+					break
+			end += 1
+		if inQuote:
+			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
+		name = data[dataIdx:end]
+		dataIdx = end
+	else:
+		while end < len(data) and not re.match(r"[(),]", data[end]):
+			end += 1
+		if (end == dataIdx):
+			raise Exception(f"ERROR: Unexpected EOF at index {dataIdx}")
+		name = data[dataIdx:end].rstrip()
+		if end == len(data): # Ignore trailing input semicolon
+			name = name[:-1]
+		dataIdx = end
+	# Convert to (name, id)
+	name = name.lower()
+	if name.startswith("mrca"):
+		return (name, name)
+	elif name[0] == "'":
+		match = re.fullmatch(r"'([^\\\"]+) (ott\d+)'", name)
+		if match == None:
+			raise Exception(f"ERROR: invalid name \"{name}\"")
+		name = match.group(1).replace("''", "'")
+		return (name, match.group(2))
+	else:
+		match = re.fullmatch(r"([^\\\"]+)_(ott\d+)", name)
+		if match == None:
+			raise Exception(f"ERROR: invalid name \"{name}\"")
+		return (match.group(1).replace("_", " "), match.group(2))
+def updateNameMaps(name, id):
+	global nameToFirstId, dupNameToIds
+	if name not in nameToFirstId:
+		nameToFirstId[name] = id
+	else:
+		if name not in dupNameToIds:
+			dupNameToIds[name] = [nameToFirstId[name], id]
+		else:
+			dupNameToIds[name].append(id)
+rootId = parseNewick()
+
+print("Resolving duplicate names")
+# Read picked-names file
+nameToPickedId = {}
+if os.path.exists(pickedNamesFile):
+	with open(pickedNamesFile) as file:
+		for line in file:
+			(name, _, otolId) = line.rstrip().partition("|")
+			nameToPickedId[name] = otolId
+# Resolve duplicates
+for (dupName, ids) in dupNameToIds.items():
+	# Check for picked id
+	if dupName in nameToPickedId:
+		idToUse = nameToPickedId[dupName]
+	else:
+		# Get conflicting node with most tips
+		tipNums = [nodeMap[id].tips for id in ids]
+		maxIdx = tipNums.index(max(tipNums))
+		idToUse = ids[maxIdx]
+	# Adjust name of other conflicting nodes
+	counter = 2
+	for id in ids:
+		if id != idToUse:
+			nodeMap[id].name += f" [{counter}]"
+			counter += 1
+
+print("Changing mrca* names")
+def convertMrcaName(id):
+	node = nodeMap[id]
+	name = node.name
+	childIds = node.childIds
+	if len(childIds) < 2:
+		print(f"WARNING: MRCA node \"{name}\" has less than 2 children")
+		return
+	# Get 2 children with most tips
+	childTips = [nodeMap[id].tips for id in childIds]
+	maxIdx1 = childTips.index(max(childTips))
+	childTips[maxIdx1] = 0
+	maxIdx2 = childTips.index(max(childTips))
+	childId1 = childIds[maxIdx1]
+	childId2 = childIds[maxIdx2]
+	childName1 = nodeMap[childId1].name
+	childName2 = nodeMap[childId2].name
+	# Check for mrca* child names
+	if childName1.startswith("mrca"):
+		childName1 = convertMrcaName(childId1)
+	if childName2.startswith("mrca"):
+		childName2 = convertMrcaName(childId2)
+	# Check for composite names
+	match = re.fullmatch(r"\[(.+) \+ (.+)]", childName1)
+	if match != None:
+		childName1 = match.group(1)
+	match = re.fullmatch(r"\[(.+) \+ (.+)]", childName2)
+	if match != None:
+		childName2 = match.group(1)
+	# Create composite name
+	node.name = f"[{childName1} + {childName2}]"
+	return childName1
+for (id, node) in nodeMap.items():
+	if node.name.startswith("mrca"):
+		convertMrcaName(id)
+
+print("Parsing annotations file")
+# Read file
+data = None
+with open(annFile) as file:
+	data = file.read()
+obj = json.loads(data)
+nodeAnnsMap = obj["nodes"]
+# Find relevant annotations
+for (id, node) in nodeMap.items():
+	# Set has-support value using annotations
+	if id in nodeAnnsMap:
+		nodeAnns = nodeAnnsMap[id]
+		supportQty = len(nodeAnns["supported_by"]) if "supported_by" in nodeAnns else 0
+		conflictQty = len(nodeAnns["conflicts_with"]) if "conflicts_with" in nodeAnns else 0
+		node.pSupport = supportQty > 0 and conflictQty == 0
+
+print("Creating nodes and edges tables")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
+dbCur.execute("CREATE INDEX nodes_idx_nc ON nodes(name COLLATE NOCASE)")
+dbCur.execute("CREATE TABLE edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
+dbCur.execute("CREATE INDEX edges_child_idx ON edges(child)")
+for (otolId, node) in nodeMap.items():
+	dbCur.execute("INSERT INTO nodes VALUES (?, ?, ?)", (node.name, otolId, node.tips))
+	for childId in node.childIds:
+		childNode = nodeMap[childId]
+		dbCur.execute("INSERT INTO edges VALUES (?, ?, ?)",
+			(node.name, childNode.name, 1 if childNode.pSupport else 0))
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/genReducedTrees.py b/backend/tolData/genReducedTrees.py
new file mode 100755
index 0000000..a921be4
--- /dev/null
+++ b/backend/tolData/genReducedTrees.py
@@ -0,0 +1,329 @@
+#!/usr/bin/python3
+
+import sys, os.path, re
+import json, sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]} [tree1]
+
+Creates reduced versions of the tree in the database:
+- A 'picked nodes' tree:
+    Created from a minimal set of node names read from a file,
+    possibly with some extra randmly-picked children.
+- An 'images only' tree:
+    Created by removing nodes without an image or presence in the
+    'picked' tree.
+- A 'weakly trimmed' tree:
+    Created by removing nodes that lack an image or description, or
+    presence in the 'picked' tree. And, for nodes with 'many' children,
+    removing some more, despite any node descriptions.
+
+If tree1 is specified, as 'picked', 'images', or 'trimmed', only that
+tree is generated.
+"""
+if len(sys.argv) > 2 or len(sys.argv) == 2 and re.fullmatch(r"picked|images|trimmed", sys.argv[1]) == None:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+tree = sys.argv[1] if len(sys.argv) > 1 else None
+dbFile = "data.db"
+pickedNodesFile = "pickedNodes.txt"
+COMP_NAME_REGEX = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes
+
+class Node:
+	def __init__(self, id, children, parent, tips, pSupport):
+		self.id = id
+		self.children = children
+		self.parent = parent
+		self.tips = tips
+		self.pSupport = pSupport
+
+print("Opening database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+def genPickedNodeTree(dbCur, pickedNames, rootName):
+	global COMP_NAME_REGEX
+	PREF_NUM_CHILDREN = 3 # Include extra children up to this limit
+	nodeMap = {} # Maps node names to Nodes
+	print("Getting ancestors")
+	nodeMap = genNodeMap(dbCur, pickedNames, 100)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Removing composite nodes")
+	removedNames = removeCompositeNodes(nodeMap)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Removing 'collapsible' nodes")
+	temp = removeCollapsibleNodes(nodeMap, pickedNames)
+	removedNames.update(temp)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Adding some additional nearby children")
+	namesToAdd = []
+	iterNum = 0
+	for (name, node) in nodeMap.items():
+		iterNum += 1
+		if iterNum % 100 == 0:
+			print(f"At iteration {iterNum}")
+		#
+		numChildren = len(node.children)
+		if numChildren < PREF_NUM_CHILDREN:
+			children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))]
+			newChildren = []
+			for n in children:
+				if n in nodeMap or n in removedNames:
+					continue
+				if COMP_NAME_REGEX.fullmatch(n) != None:
+					continue
+				if dbCur.execute("SELECT name from node_imgs WHERE name = ?", (n,)).fetchone() == None and \
+					dbCur.execute("SELECT name from linked_imgs WHERE name = ?", (n,)).fetchone() == None:
+					continue
+				newChildren.append(n)
+			newChildNames = newChildren[:(PREF_NUM_CHILDREN - numChildren)]
+			node.children.extend(newChildNames)
+			namesToAdd.extend(newChildNames)
+	for name in namesToAdd:
+		parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone()
+		(id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone()
+		parent = None if parent == "" else parent
+		nodeMap[name] = Node(id, [], parent, 0, pSupport == 1)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Updating 'tips' values")
+	updateTips(rootName, nodeMap)
+	print("Creating table")
+	addTreeTables(nodeMap, dbCur, "p")
+def genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName):
+	print("Getting ancestors")
+	nodeMap = genNodeMap(dbCur, nodesWithImgOrPicked, 1e4)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Removing composite nodes")
+	removeCompositeNodes(nodeMap)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Removing 'collapsible' nodes")
+	removeCollapsibleNodes(nodeMap, {})
+	print(f"Result has {len(nodeMap)} nodes")
+	print(f"Updating 'tips' values") # Needed for next trimming step
+	updateTips(rootName, nodeMap)
+	print(f"Trimming from nodes with 'many' children")
+	trimIfManyChildren(nodeMap, rootName, 300, pickedNames)
+	print(f"Result has {len(nodeMap)} nodes")
+	print(f"Updating 'tips' values")
+	updateTips(rootName, nodeMap)
+	print("Creating table")
+	addTreeTables(nodeMap, dbCur, "i")
+def genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName):
+	print("Getting ancestors")
+	nodeMap = genNodeMap(dbCur, nodesWithImgDescOrPicked, 1e5)
+	print(f"Result has {len(nodeMap)} nodes")
+	print("Getting nodes to 'strongly keep'")
+	iterNum = 0
+	nodesFromImgOrPicked = set()
+	for name in nodesWithImgOrPicked:
+		iterNum += 1
+		if iterNum % 1e4 == 0:
+			print(f"At iteration {iterNum}")
+		#
+		while name != None:
+			if name not in nodesFromImgOrPicked:
+				nodesFromImgOrPicked.add(name)
+				name = nodeMap[name].parent
+			else:
+				break
+	print(f"Node set has {len(nodesFromImgOrPicked)} nodes")
+	print("Removing 'collapsible' nodes")
+	removeCollapsibleNodes(nodeMap, nodesWithImgDescOrPicked)
+	print(f"Result has {len(nodeMap)} nodes")
+	print(f"Updating 'tips' values") # Needed for next trimming step
+	updateTips(rootName, nodeMap)
+	print(f"Trimming from nodes with 'many' children")
+	trimIfManyChildren(nodeMap, rootName, 600, nodesFromImgOrPicked)
+	print(f"Result has {len(nodeMap)} nodes")
+	print(f"Updating 'tips' values")
+	updateTips(rootName, nodeMap)
+	print("Creating table")
+	addTreeTables(nodeMap, dbCur, "t")
+# Helper functions
+def genNodeMap(dbCur, nameSet, itersBeforePrint = 1):
+	" Returns a subtree that includes nodes in 'nameSet', as a name-to-Node map "
+	nodeMap = {}
+	iterNum = 0
+	for name in nameSet:
+		iterNum += 1
+		if iterNum % itersBeforePrint == 0:
+			print(f"At iteration {iterNum}")
+		#
+		prevName = None
+		while name != None:
+			if name not in nodeMap:
+				# Add node
+				(id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone()
+				row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone()
+				parent = None if row == None or row[0] == "" else row[0]
+				pSupport = row == None or row[1] == 1
+				children = [] if prevName == None else [prevName]
+				nodeMap[name] = Node(id, children, parent, 0, pSupport)
+				# Iterate to parent
+				prevName = name
+				name = parent
+			else:
+				# Just add as child
+				if prevName != None:
+					nodeMap[name].children.append(prevName)
+				break
+	return nodeMap
+def removeCompositeNodes(nodeMap):
+	" Given a tree, removes composite-name nodes, and returns the removed nodes' names "
+	global COMP_NAME_REGEX
+	namesToRemove = set()
+	for (name, node) in nodeMap.items():
+		parent = node.parent
+		if parent != None and COMP_NAME_REGEX.fullmatch(name) != None:
+			# Connect children to parent
+			nodeMap[parent].children.remove(name)
+			nodeMap[parent].children.extend(node.children)
+			for n in node.children:
+				nodeMap[n].parent = parent
+				nodeMap[n].pSupport &= node.pSupport
+			# Remember for removal
+			namesToRemove.add(name)
+	for name in namesToRemove:
+		del nodeMap[name]
+	return namesToRemove
+def removeCollapsibleNodes(nodeMap, nodesToKeep = {}):
+	""" Given a tree, removes single-child parents, then only-childs,
+		with given exceptions, and returns the set of removed nodes' names """
+	namesToRemove = set()
+	# Remove single-child parents
+	for (name, node) in nodeMap.items():
+		if len(node.children) == 1 and node.parent != None and name not in nodesToKeep:
+			# Connect parent and children
+			parent = node.parent
+			child = node.children[0]
+			nodeMap[parent].children.remove(name)
+			nodeMap[parent].children.append(child)
+			nodeMap[child].parent = parent
+			nodeMap[child].pSupport &= node.pSupport
+			# Remember for removal
+			namesToRemove.add(name)
+	for name in namesToRemove:
+		del nodeMap[name]
+	# Remove only-childs (not redundant because 'nodesToKeep' can cause single-child parents to be kept)
+	namesToRemove.clear()
+	for (name, node) in nodeMap.items():
+		isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1
+		if isOnlyChild and name not in nodesToKeep:
+			# Connect parent and children
+			parent = node.parent
+			nodeMap[parent].children = node.children
+			for n in node.children:
+				nodeMap[n].parent = parent
+				nodeMap[n].pSupport &= node.pSupport
+			# Remember for removal
+			namesToRemove.add(name)
+	for name in namesToRemove:
+		del nodeMap[name]
+	#
+	return namesToRemove
+def trimIfManyChildren(nodeMap, rootName, childThreshold, nodesToKeep = {}):
+	namesToRemove = set()
+	def findTrimmables(nodeName):
+		nonlocal nodeMap, nodesToKeep
+		node = nodeMap[nodeName]
+		if len(node.children) > childThreshold:
+			numToTrim = len(node.children) - childThreshold
+			# Try removing nodes, preferring those with less tips
+			candidatesToTrim = [n for n in node.children if n not in nodesToKeep]
+			childToTips = {n: nodeMap[n].tips for n in candidatesToTrim}
+			candidatesToTrim.sort(key=lambda n: childToTips[n], reverse=True)
+			childrenToRemove = set(candidatesToTrim[-numToTrim:])
+			node.children = [n for n in node.children if n not in childrenToRemove]
+			# Mark nodes for deletion
+			for n in childrenToRemove:
+				markForRemoval(n)
+		# Recurse on children
+		for n in node.children:
+			findTrimmables(n)
+	def markForRemoval(nodeName):
+		nonlocal nodeMap, namesToRemove
+		namesToRemove.add(nodeName)
+		for child in nodeMap[nodeName].children:
+			markForRemoval(child)
+	findTrimmables(rootName)
+	for nodeName in namesToRemove:
+		del nodeMap[nodeName]
+def updateTips(nodeName, nodeMap):
+	" Updates the 'tips' values for a node and it's descendants, returning the node's new 'tips' value "
+	node = nodeMap[nodeName]
+	tips = sum([updateTips(childName, nodeMap) for childName in node.children])
+	tips = max(1, tips)
+	node.tips = tips
+	return tips
+def addTreeTables(nodeMap, dbCur, suffix):
+	" Adds a tree to the database, as tables nodes_X and edges_X, where X is the given suffix "
+	nodesTbl = f"nodes_{suffix}"
+	edgesTbl = f"edges_{suffix}"
+	dbCur.execute(f"CREATE TABLE {nodesTbl} (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
+	dbCur.execute(f"CREATE INDEX {nodesTbl}_idx_nc ON {nodesTbl}(name COLLATE NOCASE)")
+	dbCur.execute(f"CREATE TABLE {edgesTbl} (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
+	dbCur.execute(f"CREATE INDEX {edgesTbl}_child_idx ON {edgesTbl}(child)")
+	for (name, node) in nodeMap.items():
+		dbCur.execute(f"INSERT INTO {nodesTbl} VALUES (?, ?, ?)", (name, node.id, node.tips))
+		for childName in node.children:
+			pSupport = 1 if nodeMap[childName].pSupport else 0
+			dbCur.execute(f"INSERT INTO {edgesTbl} VALUES (?, ?, ?)", (name, childName, pSupport))
+
+print(f"Finding root node")
+query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
+(rootName,) = dbCur.execute(query).fetchone()
+print(f"Found \"{rootName}\"")
+
+print('=== Getting picked-nodes ===')
+pickedNames = set()
+pickedTreeExists = False
+if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='nodes_p'").fetchone() == None:
+	print(f"Reading from {pickedNodesFile}")
+	with open(pickedNodesFile) as file:
+		for line in file:
+			name = line.rstrip()
+			row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone()
+			if row == None:
+				row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone()
+			if row != None:
+				pickedNames.add(row[0])
+	if len(pickedNames) == 0:
+		raise Exception("ERROR: No picked names found")
+else:
+	pickedTreeExists = True
+	print("Picked-node tree already exists")
+	if tree == 'picked':
+		sys.exit()
+	for (name,) in dbCur.execute("SELECT name FROM nodes_p"):
+		pickedNames.add(name)
+print(f"Found {len(pickedNames)} names")
+
+if (tree == 'picked' or tree == None) and not pickedTreeExists:
+	print("=== Generating picked-nodes tree ===")
+	genPickedNodeTree(dbCur, pickedNames, rootName)
+if tree != 'picked':
+	print("=== Finding 'non-low significance' nodes ===")
+	nodesWithImgOrPicked = set()
+	nodesWithImgDescOrPicked = set()
+	print("Finding nodes with descs")
+	for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc
+		nodesWithImgDescOrPicked.add(name)
+	print("Finding nodes with images")
+	for (name,) in dbCur.execute("SELECT name FROM node_imgs"):
+		nodesWithImgDescOrPicked.add(name)
+		nodesWithImgOrPicked.add(name)
+	print("Adding picked nodes")
+	for name in pickedNames:
+		nodesWithImgDescOrPicked.add(name)
+		nodesWithImgOrPicked.add(name)
+	if tree == 'images' or tree == None:
+		print("=== Generating images-only tree ===")
+		genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName)
+	if tree == 'trimmed' or tree == None:
+		print("=== Generating weakly-trimmed tree ===")
+		genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName)
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/tolData/otol/README.md b/backend/tolData/otol/README.md
new file mode 100644
index 0000000..4be2fd2
--- /dev/null
+++ b/backend/tolData/otol/README.md
@@ -0,0 +1,10 @@
+Files
+=====
+-   opentree13.4tree.tgz <br>
+    Obtained from <https://tree.opentreeoflife.org/about/synthesis-release/v13.4>.
+    Contains tree data from the [Open Tree of Life](https://tree.opentreeoflife.org/about/open-tree-of-life).
+-   labelled\_supertree\_ottnames.tre <br>
+    Extracted from the .tgz file. Describes the structure of the tree.
+-   annotations.json
+    Extracted from the .tgz file. Contains additional attributes of tree
+    nodes. Used for finding out which nodes have 'phylogenetic support'.
diff --git a/backend/tolData/pickedImgs/README.md b/backend/tolData/pickedImgs/README.md
new file mode 100644
index 0000000..dfe192b
--- /dev/null
+++ b/backend/tolData/pickedImgs/README.md
@@ -0,0 +1,10 @@
+This directory holds additional image files to use for tree-of-life nodes,
+on top of those from EOL and Wikipedia.
+
+Possible Files
+==============
+-   (Image files)
+-   imgData.txt <br>
+    Contains lines with the format `filename|url|license|artist|credit`.
+    The filename should consist of a node name, with an image extension.
+    Other fields correspond to those in the `images` table (see ../README.md).
diff --git a/backend/tolData/reviewImgsToGen.py b/backend/tolData/reviewImgsToGen.py
new file mode 100755
index 0000000..de592f5
--- /dev/null
+++ b/backend/tolData/reviewImgsToGen.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python3
+
+import sys, re, os, time
+import sqlite3
+import tkinter as tki
+from tkinter import ttk
+import PIL
+from PIL import ImageTk, Image, ImageOps
+
+usageInfo = f"""
+Usage: {sys.argv[0]}
+
+Provides a GUI that displays, for each node in the database, associated
+images from EOL and Wikipedia, and allows choosing which to use. Writes
+choice data to a text file with lines of the form 'otolId1 imgPath1', or
+'otolId1', where no path indicates a choice of no image.
+
+The program can be closed, and run again to continue from the last choice.
+The program looks for an existing output file to determine what choices
+have already been made.
+"""
+if len(sys.argv) > 1:
+	print(usageInfo, file=sys.stderr)
+	sys.exit(1)
+
+eolImgDir = "eol/imgs/"
+enwikiImgDir = "enwiki/imgs/"
+dbFile = "data.db"
+outFile = "imgList.txt"
+IMG_DISPLAY_SZ = 400
+PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135))
+onlyReviewPairs = True
+
+print("Opening database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+nodeToImgs = {} # Maps otol-ids to arrays of image paths
+print("Iterating through images from EOL")
+if os.path.exists(eolImgDir):
+	for filename in os.listdir(eolImgDir):
+		# Get associated EOL ID
+		eolId, _, _ = filename.partition(" ")
+		query = "SELECT nodes.id FROM nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name WHERE eol_ids.id = ?"
+		# Get associated node IDs
+		found = False
+		for (otolId,) in dbCur.execute(query, (int(eolId),)):
+			if otolId not in nodeToImgs:
+				nodeToImgs[otolId] = []
+			nodeToImgs[otolId].append(eolImgDir + filename)
+			found = True
+		if not found:
+			print(f"WARNING: No node found for {eolImgDir}{filename}")
+print(f"Result: {len(nodeToImgs)} nodes with images")
+print("Iterating through images from Wikipedia")
+if os.path.exists(enwikiImgDir):
+	for filename in os.listdir(enwikiImgDir):
+		# Get associated page ID
+		(wikiId, _, _) = filename.partition(".")
+		# Get associated node IDs
+		query = "SELECT nodes.id FROM nodes INNER JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id = ?"
+		found = False
+		for (otolId,) in dbCur.execute(query, (int(wikiId),)):
+			if otolId not in nodeToImgs:
+				nodeToImgs[otolId] = []
+			nodeToImgs[otolId].append(enwikiImgDir + filename)
+			found = True
+		if not found:
+			print(f"WARNING: No node found for {enwikiImgDir}{filename}")
+print(f"Result: {len(nodeToImgs)} nodes with images")
+print("Filtering out already-made image choices")
+oldSz = len(nodeToImgs)
+if os.path.exists(outFile):
+	with open(outFile) as file:
+		for line in file:
+			line = line.rstrip()
+			if " " in line:
+				line = line[:line.find(" ")]
+			del nodeToImgs[line]
+print(f"Filtered out {oldSz - len(nodeToImgs)} entries")
+
+class ImgReviewer:
+	" Provides the GUI for reviewing images "
+	def __init__(self, root, nodeToImgs):
+		self.root = root
+		root.title("Image Reviewer")
+		# Setup main frame
+		mainFrame = ttk.Frame(root, padding="5 5 5 5")
+		mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S))
+		root.columnconfigure(0, weight=1)
+		root.rowconfigure(0, weight=1)
+		# Set up images-to-be-reviewed frames
+		self.eolImg = ImageTk.PhotoImage(PLACEHOLDER_IMG)
+		self.enwikiImg = ImageTk.PhotoImage(PLACEHOLDER_IMG)
+		self.labels = []
+		for i in (0, 1):
+			frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ)
+			frame.grid(column=i, row=0)
+			label = ttk.Label(frame, image=self.eolImg if i == 0 else self.enwikiImg)
+			label.grid(column=0, row=0)
+			self.labels.append(label)
+		# Add padding
+		for child in mainFrame.winfo_children():
+			child.grid_configure(padx=5, pady=5)
+		# Add keyboard bindings
+		root.bind("<q>", self.quit)
+		root.bind("<Key-j>", lambda evt: self.accept(0))
+		root.bind("<Key-k>", lambda evt: self.accept(1))
+		root.bind("<Key-l>", lambda evt: self.reject())
+		# Set fields
+		self.nodeImgsList = list(nodeToImgs.items())
+		self.listIdx = -1
+		self.otolId = None
+		self.eolImgPath = None
+		self.enwikiImgPath = None
+		self.numReviewed = 0
+		self.startTime = time.time()
+		# Initialise images to review
+		self.getNextImgs()
+	def getNextImgs(self):
+		" Updates display with new images to review, or ends program "
+		# Get next image paths
+		while True:
+			self.listIdx += 1
+			if self.listIdx == len(self.nodeImgsList):
+				print("No more images to review. Exiting program.")
+				self.quit()
+				return
+			self.otolId, imgPaths = self.nodeImgsList[self.listIdx]
+			# Potentially skip user choice
+			if onlyReviewPairs and len(imgPaths) == 1:
+				with open(outFile, 'a') as file:
+					file.write(f"{self.otolId} {imgPaths[0]}\n")
+				continue
+			break
+		# Update displayed images
+		self.eolImgPath = self.enwikiImgPath = None
+		imageOpenError = False
+		for imgPath in imgPaths:
+			img = None
+			try:
+				img = Image.open(imgPath)
+				img = ImageOps.exif_transpose(img)
+			except PIL.UnidentifiedImageError:
+				print(f"UnidentifiedImageError for {imgPath}")
+				imageOpenError = True
+				continue
+			if imgPath.startswith("eol/"):
+				self.eolImgPath = imgPath
+				self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
+			elif imgPath.startswith("enwiki/"):
+				self.enwikiImgPath = imgPath
+				self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img))
+			else:
+				print(f"Unexpected image path {imgPath}")
+				self.quit()
+				return
+		# Re-iterate if all image paths invalid
+		if self.eolImgPath == None and self.enwikiImgPath == None:
+			if imageOpenError:
+				self.reject()
+			self.getNextImgs()
+			return
+		# Add placeholder images
+		if self.eolImgPath == None:
+			self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
+		elif self.enwikiImgPath == None:
+			self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG))
+		# Update image-frames
+		self.labels[0].config(image=self.eolImg)
+		self.labels[1].config(image=self.enwikiImg)
+		# Update title
+		title = f"Images for otol ID {self.otolId}"
+		query = "SELECT names.alt_name FROM" \
+			" nodes INNER JOIN names ON nodes.name = names.name" \
+			" WHERE nodes.id = ? and pref_alt = 1"
+		row = dbCur.execute(query, (self.otolId,)).fetchone()
+		if row != None:
+			title += f", aka {row[0]}"
+		title += f" ({self.listIdx + 1} out of {len(self.nodeImgsList)})"
+		self.root.title(title)
+	def accept(self, imgIdx):
+		" React to a user selecting an image "
+		imgPath = self.eolImgPath if imgIdx == 0 else self.enwikiImgPath
+		if imgPath == None:
+			print("Invalid selection")
+			return
+		with open(outFile, 'a') as file:
+			file.write(f"{self.otolId} {imgPath}\n")
+		self.numReviewed += 1
+		self.getNextImgs()
+	def reject(self):
+		" React to a user rejecting all images of a set "
+		with open(outFile, 'a') as file:
+			file.write(f"{self.otolId}\n")
+		self.numReviewed += 1
+		self.getNextImgs()
+	def quit(self, e = None):
+		global dbCon
+		print(f"Number reviewed: {self.numReviewed}")
+		timeElapsed = time.time() - self.startTime
+		print(f"Time elapsed: {timeElapsed:.2f} seconds")
+		if self.numReviewed > 0:
+			print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds")
+		dbCon.close()
+		self.root.destroy()
+	def resizeImgForDisplay(self, img):
+		" Returns a copy of an image, shrunk to fit it's frame (keeps aspect ratio), and with a background "
+		if max(img.width, img.height) > IMG_DISPLAY_SZ:
+			if (img.width > img.height):
+				newHeight = int(img.height * IMG_DISPLAY_SZ/img.width)
+				img = img.resize((IMG_DISPLAY_SZ, newHeight))
+			else:
+				newWidth = int(img.width * IMG_DISPLAY_SZ / img.height)
+				img = img.resize((newWidth, IMG_DISPLAY_SZ))
+		bgImg = PLACEHOLDER_IMG.copy()
+		bgImg.paste(img, box=(
+			int((IMG_DISPLAY_SZ - img.width) / 2),
+			int((IMG_DISPLAY_SZ - img.height) / 2)))
+		return bgImg
+# Create GUI and defer control
+print("Starting GUI")
+root = tki.Tk()
+ImgReviewer(root, nodeToImgs)
+root.mainloop()
diff --git a/src/lib.ts b/src/lib.ts
index d2ad959..c9570bc 100644
--- a/src/lib.ts
+++ b/src/lib.ts
@@ -7,11 +7,11 @@ import {LayoutOptions} from './layout';
 import {getBreakpoint, Breakpoint, getScrollBarWidth, onTouchDevice} from './util';
 
 // For server requests
-const SERVER_URL = (new URL(window.location.href)).origin + '/data'
-const SERVER_IMG_PATH = '/img/'
+const SERVER_DATA_URL = (new URL(window.location.href)).origin + '/data/'
+const SERVER_IMG_PATH = '/tolData/img/'
 export async function queryServer(params: URLSearchParams){
 	// Construct URL
-	let url = new URL(SERVER_URL);
+	let url = new URL(SERVER_DATA_URL);
 	url.search = params.toString();
 	// Query server
 	let responseObj;
diff --git a/vite.config.js b/vite.config.js
index 7352973..af88991 100644
--- a/vite.config.js
+++ b/vite.config.js
@@ -5,7 +5,7 @@ export default defineConfig({
 	base: '/',
 	plugins: [vue()],
 	server: {
-		proxy: {'/data': 'http://localhost:8000'},
+		proxy: {'/data': 'http://localhost:8000', '/tolData': 'http://localhost:8000', },
 		watch: {
 			ignored: ['**/backend', '**/public']
 		},
-- 
cgit v1.2.3