diff options
Diffstat (limited to 'backend/data')
| -rw-r--r-- | backend/data/README.md | 2 | ||||
| -rwxr-xr-x | backend/data/genEolNameData.py | 22 | ||||
| -rwxr-xr-x | backend/data/reviewImgs.py | 17 |
3 files changed, 31 insertions, 10 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 3cbeb03..88053b5 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -8,6 +8,6 @@ File Generation Process 4 Use downloadImgsForReview.py to download EOL images into imgsForReview/. It uses data in eol/imagesList.db, and the 'nodes' table. 5 Use reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique - images in imgsReviewed/. + images in imgsReviewed/ (uses 'names' to display common names). 6 Use genImgsForWeb.py to create cropped/resized images in img/, using images in imgsReviewed, and also to add an 'images' table to data.db. diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py index ce887b3..5070fd7 100755 --- a/backend/data/genEolNameData.py +++ b/backend/data/genEolNameData.py @@ -23,7 +23,8 @@ dbFile = "data.db" nameToPids = {} pidToNames = {} canonicalNameToPids = {} -def updateMaps(name, pid, canonical): +pidToPreferred = {} +def updateMaps(name, pid, canonical, preferredAlt): if name not in nameToPids: nameToPids[name] = {pid} else: @@ -37,6 +38,8 @@ def updateMaps(name, pid, canonical): pidToNames[pid] = {name} else: pidToNames[pid].add(name) + if preferredAlt: + pidToPreferred[pid] = name with open(vnamesFile, newline="") as csvfile: reader = csv.reader(csvfile) lineNum = 0 @@ -47,14 +50,15 @@ with open(vnamesFile, newline="") as csvfile: pid = int(row[0]) name1 = re.sub(r"<[^>]+>", "", row[1].lower()) # Remove tags name2 = row[2].lower() + preferred = row[6] == "preferred" and row[3] == "eng" # Add to maps - updateMaps(name1, pid, True) - updateMaps(name2, pid, False) + updateMaps(name1, pid, True, False) + updateMaps(name2, pid, False, preferred) # Open db connection dbCon = sqlite3.connect(dbFile) -cur = dbCon.cursor() +dbCur = dbCon.cursor() # Create 'names' table -cur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, PRIMARY KEY(name, alt_name))") +dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, pref_alt INT, PRIMARY KEY(name, alt_name))") # Iterate through 'nodes' table, resolving to canonical-names usedPids = set() unresolvedNodeNames = set() @@ -75,10 +79,12 @@ for row in cur2.execute("SELECT name FROM nodes"): if pidToUse > 0: usedPids.add(pidToUse) altNames = {name} + preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None for n in pidToNames[pidToUse]: altNames.add(n) for n in altNames: - cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pidToUse)) + isPreferred = 1 if (n == preferredName) else 0 + dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred)) elif name in nameToPids: unresolvedNodeNames.add(name) # Iterate through unresolved nodes, resolving to vernacular-names @@ -96,9 +102,11 @@ for name in unresolvedNodeNames: if pidToUse > 0: usedPids.add(pidToUse) altNames = {name} + preferredName = pidToPreferred[pidToUse] if (pidToUse in pidToPreferred) else None for n in pidToNames[pidToUse]: altNames.add(n) for n in altNames: - cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pidToUse)) + isPreferred = 1 if (n == preferredName) else 0 + dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred)) dbCon.commit() dbCon.close() diff --git a/backend/data/reviewImgs.py b/backend/data/reviewImgs.py index 3df7809..5dcd52e 100755 --- a/backend/data/reviewImgs.py +++ b/backend/data/reviewImgs.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import sys, re, os +import sqlite3 import tkinter as tki from tkinter import ttk import PIL @@ -17,6 +18,7 @@ if len(sys.argv) > 1: imgDir = "imgsForReview/" outDir = "imgsReviewed/" +dbFile = "data.db" IMG_DISPLAY_SZ = 400 MAX_IMGS_PER_ID = 3 PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135)) @@ -31,6 +33,9 @@ imgList.sort(key=lambda s: int(s.split(" ")[0])) if len(imgList) == 0: print("No input images found", file=sys.stderr) sys.exit(1) +# Open db +dbCon = sqlite3.connect(dbFile) +dbCur = dbCon.cursor() class EolImgReviewer: """ Provides the GUI for reviewing images """ @@ -120,8 +125,15 @@ class EolImgReviewer: # Update title firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1 lastImgIdx = self.imgListIdx - self.root.title("Reviewing EOL ID {} (imgs {} to {} out of {})".format( - self.nextEolId, firstImgIdx, lastImgIdx, len(self.imgList))) + row = dbCur.execute("SELECT alt_name, eol_id, pref_alt FROM names WHERE eol_id = ? and pref_alt = 1", + (self.nextEolId,)).fetchone() + if row != None: + commonName = row[0] + self.root.title("Reviewing EOL ID {}, aka \"{}\" (imgs {} to {} out of {})".format( + self.nextEolId, commonName, firstImgIdx, lastImgIdx, len(self.imgList))) + else: + self.root.title("Reviewing EOL ID {} (imgs {} to {} out of {})".format( + self.nextEolId, firstImgIdx, lastImgIdx, len(self.imgList))) def accept(self, imgIdx): """ React to a user selecting an image """ if imgIdx >= len(self.nextImgNames): @@ -154,6 +166,7 @@ class EolImgReviewer: self.labels[imgIdx].config(image=self.photoImgs[imgIdx]) self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360 def quit(self, e = None): + dbCon.close() self.root.destroy() def resizeForDisplay(self, img): """ Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """ |
