aboutsummaryrefslogtreecommitdiff
path: root/backend/data
diff options
context:
space:
mode:
Diffstat (limited to 'backend/data')
-rw-r--r--backend/data/README.md2
-rwxr-xr-xbackend/data/genEolNameData.py22
-rwxr-xr-xbackend/data/reviewImgs.py17
3 files changed, 31 insertions, 10 deletions
diff --git a/backend/data/README.md b/backend/data/README.md
index 3cbeb03..88053b5 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -8,6 +8,6 @@ File Generation Process
4 Use downloadImgsForReview.py to download EOL images into imgsForReview/.
It uses data in eol/imagesList.db, and the 'nodes' table.
5 Use reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique
- images in imgsReviewed/.
+ images in imgsReviewed/ (uses 'names' to display common names).
6 Use genImgsForWeb.py to create cropped/resized images in img/, using
images in imgsReviewed, and also to add an 'images' table to data.db.
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py
index ce887b3..5070fd7 100755
--- a/backend/data/genEolNameData.py
+++ b/backend/data/genEolNameData.py
@@ -23,7 +23,8 @@ dbFile = "data.db"
nameToPids = {}
pidToNames = {}
canonicalNameToPids = {}
-def updateMaps(name, pid, canonical):
+pidToPreferred = {}
+def updateMaps(name, pid, canonical, preferredAlt):
if name not in nameToPids:
nameToPids[name] = {pid}
else:
@@ -37,6 +38,8 @@ def updateMaps(name, pid, canonical):
pidToNames[pid] = {name}
else:
pidToNames[pid].add(name)
+ if preferredAlt:
+ pidToPreferred[pid] = name
with open(vnamesFile, newline="") as csvfile:
reader = csv.reader(csvfile)
lineNum = 0
@@ -47,14 +50,15 @@ with open(vnamesFile, newline="") as csvfile:
pid = int(row[0])
name1 = re.sub(r"<[^>]+>", "", row[1].lower()) # Remove tags
name2 = row[2].lower()
+ preferred = row[6] == "preferred" and row[3] == "eng"
# Add to maps
- updateMaps(name1, pid, True)
- updateMaps(name2, pid, False)
+ updateMaps(name1, pid, True, False)
+ updateMaps(name2, pid, False, preferred)
# Open db connection
dbCon = sqlite3.connect(dbFile)
-cur = dbCon.cursor()
+dbCur = dbCon.cursor()
# Create 'names' table
-cur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, PRIMARY KEY(name, alt_name))")
+dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, pref_alt INT, PRIMARY KEY(name, alt_name))")
# Iterate through 'nodes' table, resolving to canonical-names
usedPids = set()
unresolvedNodeNames = set()
@@ -75,10 +79,12 @@ for row in cur2.execute("SELECT name FROM nodes"):
if pidToUse > 0:
usedPids.add(pidToUse)
altNames = {name}
+ preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
for n in pidToNames[pidToUse]:
altNames.add(n)
for n in altNames:
- cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pidToUse))
+ isPreferred = 1 if (n == preferredName) else 0
+ dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred))
elif name in nameToPids:
unresolvedNodeNames.add(name)
# Iterate through unresolved nodes, resolving to vernacular-names
@@ -96,9 +102,11 @@ for name in unresolvedNodeNames:
if pidToUse > 0:
usedPids.add(pidToUse)
altNames = {name}
+ preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None
for n in pidToNames[pidToUse]:
altNames.add(n)
for n in altNames:
- cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pidToUse))
+ isPreferred = 1 if (n == preferredName) else 0
+ dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred))
dbCon.commit()
dbCon.close()
diff --git a/backend/data/reviewImgs.py b/backend/data/reviewImgs.py
index 3df7809..5dcd52e 100755
--- a/backend/data/reviewImgs.py
+++ b/backend/data/reviewImgs.py
@@ -1,6 +1,7 @@
#!/usr/bin/python3
import sys, re, os
+import sqlite3
import tkinter as tki
from tkinter import ttk
import PIL
@@ -17,6 +18,7 @@ if len(sys.argv) > 1:
imgDir = "imgsForReview/"
outDir = "imgsReviewed/"
+dbFile = "data.db"
IMG_DISPLAY_SZ = 400
MAX_IMGS_PER_ID = 3
PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135))
@@ -31,6 +33,9 @@ imgList.sort(key=lambda s: int(s.split(" ")[0]))
if len(imgList) == 0:
print("No input images found", file=sys.stderr)
sys.exit(1)
+# Open db
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
class EolImgReviewer:
""" Provides the GUI for reviewing images """
@@ -120,8 +125,15 @@ class EolImgReviewer:
# Update title
firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1
lastImgIdx = self.imgListIdx
- self.root.title("Reviewing EOL ID {} (imgs {} to {} out of {})".format(
- self.nextEolId, firstImgIdx, lastImgIdx, len(self.imgList)))
+ row = dbCur.execute("SELECT alt_name, eol_id, pref_alt FROM names WHERE eol_id = ? and pref_alt = 1",
+ (self.nextEolId,)).fetchone()
+ if row != None:
+ commonName = row[0]
+ self.root.title("Reviewing EOL ID {}, aka \"{}\" (imgs {} to {} out of {})".format(
+ self.nextEolId, commonName, firstImgIdx, lastImgIdx, len(self.imgList)))
+ else:
+ self.root.title("Reviewing EOL ID {} (imgs {} to {} out of {})".format(
+ self.nextEolId, firstImgIdx, lastImgIdx, len(self.imgList)))
def accept(self, imgIdx):
""" React to a user selecting an image """
if imgIdx >= len(self.nextImgNames):
@@ -154,6 +166,7 @@ class EolImgReviewer:
self.labels[imgIdx].config(image=self.photoImgs[imgIdx])
self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360
def quit(self, e = None):
+ dbCon.close()
self.root.destroy()
def resizeForDisplay(self, img):
""" Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """