aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
Diffstat (limited to 'backend')
-rw-r--r--backend/data/README.md4
-rwxr-xr-xbackend/data/addPickedNames.py48
-rw-r--r--backend/data/enwiki/README.md1
3 files changed, 52 insertions, 1 deletions
diff --git a/backend/data/README.md b/backend/data/README.md
index 18e5da3..d4a6196 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -58,6 +58,8 @@ File Generation Process
6 Other
- Optionally run genEnwikiNameData.py, which adds more entries to the 'names' table,
using data in enwiki/enwikiData.db, and the 'names' and 'wiki_ids' tables.
+ - Optionally run addPickedNames.py, which adds manually-picked names to
+ the 'names' table, as specified in pickedNames.txt.
- Optionally run trimTree.py, which tries to remove some 'low-significance' nodes,
for the sake of performance and result-relevance. Without this, jumping to certain
nodes within the fungi and moths can take over a minute to render.
@@ -72,7 +74,7 @@ data.db Tables
- descs: wiki\_id INT PRIMARY KEY, desc TEXT, from\_dbp INT
- node\_imgs: name TEXT PRIMARY KEY, img\_id INT, src TEXT
- images: id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src)
-- linked\_imgs: name TEXT PRIMARY KEY, otol\_id TEXT, otol\_id2 TEXT
+- linked\_imgs: name TEXT PRIMARY KEY, otol\_ids TEXT
- r\_nodes: name TEXT PRIMARY KEY, tips INT
- r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child)
diff --git a/backend/data/addPickedNames.py b/backend/data/addPickedNames.py
new file mode 100755
index 0000000..b231ab7
--- /dev/null
+++ b/backend/data/addPickedNames.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python3
+
+import sys
+import sqlite3
+
+usageInfo = f"usage: {sys.argv[0]}\n"
+usageInfo += "Reads alt-name data from a file, and adds it to the 'names' table.\n"
+usageInfo += "The file is expected to have lines of the form: nodeName|altName|prefAlt\n"
+usageInfo += " These correspond to entries in the 'names' table. 'prefAlt' should\n"
+usageInfo += " be 1 or 0. A line may specify name1|name1|1, which causes the node\n"
+usageInfo += " to have no preferred alt-name.\n"
+if len(sys.argv) > 1:
+ print(usageInfo, file=sys.stderr)
+ sys.exit(1)
+
+dbFile = "data.db"
+pickedNamesFile = "pickedNames.txt"
+
+# Open db
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+# Iterate through picked-names file
+with open(pickedNamesFile) as file:
+ for line in file:
+ # Get record data
+ (nodeName, altName, prefAlt) = line.lower().rstrip().split("|")
+ prefAlt = int(prefAlt)
+ # Remove any existing preferred-alt status
+ if prefAlt == 1:
+ query = "SELECT name, alt_name FROM names WHERE name = ? AND pref_alt = 1"
+ row = dbCur.execute(query, (nodeName,)).fetchone()
+ if row != None:
+ dbCur.execute("UPDATE names SET pref_alt = 0 WHERE name = ? AND alt_name = ?", row)
+ # Check for an existing record
+ if nodeName == altName:
+ continue
+ query = "SELECT name, alt_name, pref_alt FROM names WHERE name = ? AND alt_name = ?"
+ row = dbCur.execute(query, (nodeName, altName)).fetchone()
+ if row == None:
+ dbCur.execute("INSERT INTO names VALUES (?, ?, ?, 'picked')", (nodeName, altName, prefAlt))
+ else:
+ # Update existing record
+ if row[2] != prefAlt:
+ dbCur.execute("UPDATE names SET pref_alt = ?, src = 'picked' WHERE name = ? AND alt_name = ?",
+ (prefAlt, nodeName, altName))
+# Close db
+dbCon.commit()
+dbCon.close()
diff --git a/backend/data/enwiki/README.md b/backend/data/enwiki/README.md
index 22af5ba..6462d7d 100644
--- a/backend/data/enwiki/README.md
+++ b/backend/data/enwiki/README.md
@@ -34,5 +34,6 @@ Generated Files
file and dumpIndex.db. <br>
Tables: <br>
- page\_imgs: page\_id INT PRIMAY KEY, img\_name TEXT
+ (img\_name may be null, which is used to avoid re-processing the page-id on a second pass)
- imgs: name TEXT PRIMARY KEY, license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT
(might lack some matches for 'img_name' in 'page_imgs', due to inability to get license info)