diff options
Diffstat (limited to 'backend/tol_data/review_imgs_to_gen.py')
| -rwxr-xr-x | backend/tol_data/review_imgs_to_gen.py | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/backend/tol_data/review_imgs_to_gen.py b/backend/tol_data/review_imgs_to_gen.py new file mode 100755 index 0000000..2283ed7 --- /dev/null +++ b/backend/tol_data/review_imgs_to_gen.py @@ -0,0 +1,241 @@ +#!/usr/bin/python3 + +""" +Provides a GUI that displays, for each node in the database, associated +images from EOL and Wikipedia, and allows choosing which to use. Writes +choice data to a text file with lines of the form 'otolId1 imgPath1', or +'otolId1', where no path indicates a choice of no image. + +The program can be closed, and run again to continue from the last choice. +The program looks for an existing output file to determine what choices +have already been made. +""" + +import os, time +import sqlite3 +import tkinter as tki +from tkinter import ttk +import PIL +from PIL import ImageTk, Image, ImageOps + +EOL_IMG_DIR = os.path.join('eol', 'imgs') +ENWIKI_IMG_DIR = os.path.join('enwiki', 'imgs') +DB_FILE = 'data.db' +OUT_FILE = 'img_list.txt' +# +IMG_DISPLAY_SZ = 400 +PLACEHOLDER_IMG = Image.new('RGB', (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135)) +REVIEW = 'only pairs' # Can be: 'all', 'only pairs', 'none' + +class ImgReviewer: + """ Provides the GUI for reviewing images """ + def __init__(self, root, nodeToImgs, eolImgDir, enwikiImgDir, outFile, dbCon, review): + self.root = root + root.title('Image Reviewer') + # Setup main frame + mainFrame = ttk.Frame(root, padding='5 5 5 5') + mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S)) + root.columnconfigure(0, weight=1) + root.rowconfigure(0, weight=1) + # Set up images-to-be-reviewed frames + self.eolImg = ImageTk.PhotoImage(PLACEHOLDER_IMG) + self.enwikiImg = ImageTk.PhotoImage(PLACEHOLDER_IMG) + self.labels: list[ttk.Label] = [] + for i in (0, 1): + frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ) + frame.grid(column=i, row=0) + label = ttk.Label(frame, image=self.eolImg if i == 0 else self.enwikiImg) + label.grid(column=0, row=0) + self.labels.append(label) + # Add padding + for child in mainFrame.winfo_children(): + child.grid_configure(padx=5, pady=5) + # Add keyboard bindings + root.bind('<q>', self.quit) + root.bind('<Key-j>', lambda evt: self.accept(0)) + root.bind('<Key-k>', lambda evt: self.accept(1)) + root.bind('<Key-l>', lambda evt: self.reject()) + # Set fields + self.nodeImgsList = list(nodeToImgs.items()) + self.listIdx = -1 + self.eolImgDir = eolImgDir + self.enwikiImgDir = enwikiImgDir + self.outFile = outFile + self.review = review + self.dbCon = dbCon + self.dbCur = dbCon.cursor() + self.otolId = None + self.eolImgPath = None + self.enwikiImgPath = None + self.numReviewed = 0 + self.startTime = time.time() + # Initialise images to review + self.getNextImgs() + def getNextImgs(self): + """ Updates display with new images to review, or ends program """ + # Get next image paths + while True: + self.listIdx += 1 + if self.listIdx == len(self.nodeImgsList): + print('No more images to review. Exiting program.') + self.quit() + return + self.otolId, imgPaths = self.nodeImgsList[self.listIdx] + # Potentially skip user choice + if len(imgPaths) == 1 and (self.review == 'only pairs' or self.review == 'none'): + with open(self.outFile, 'a') as file: + file.write(f'{self.otolId} {imgPaths[0]}\n') + continue + elif self.review == 'none': + with open(self.outFile, 'a') as file: + file.write(f'{self.otolId} {imgPaths[-1]}\n') # Prefer enwiki image + continue + break + # Update displayed images + self.eolImgPath = self.enwikiImgPath = None + imageOpenError = False + for imgPath in imgPaths: + img: Image + try: + img = Image.open(imgPath) + img = ImageOps.exif_transpose(img) + except PIL.UnidentifiedImageError: + print(f'UnidentifiedImageError for {imgPath}') + imageOpenError = True + continue + if imgPath.startswith(self.eolImgDir): + self.eolImgPath = imgPath + self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img)) + elif imgPath.startswith(self.enwikiImgDir): + self.enwikiImgPath = imgPath + self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(img)) + else: + print(f'Unexpected image path {imgPath}') + self.quit() + return + # Re-iterate if all image paths invalid + if self.eolImgPath is None and self.enwikiImgPath is None: + if imageOpenError: + self.reject() + self.getNextImgs() + return + # Add placeholder images + if self.eolImgPath is None: + self.eolImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG)) + elif self.enwikiImgPath is None: + self.enwikiImg = ImageTk.PhotoImage(self.resizeImgForDisplay(PLACEHOLDER_IMG)) + # Update image-frames + self.labels[0].config(image=self.eolImg) + self.labels[1].config(image=self.enwikiImg) + # Update title + title = f'Images for otol ID {self.otolId}' + query = 'SELECT names.alt_name FROM' \ + ' nodes INNER JOIN names ON nodes.name = names.name' \ + ' WHERE nodes.id = ? and pref_alt = 1' + row = self.dbCur.execute(query, (self.otolId,)).fetchone() + if row is not None: + title += f', aka {row[0]}' + title += f' ({self.listIdx + 1} out of {len(self.nodeImgsList)})' + self.root.title(title) + def accept(self, imgIdx): + """ React to a user selecting an image """ + imgPath = self.eolImgPath if imgIdx == 0 else self.enwikiImgPath + if imgPath is None: + print('Invalid selection') + return + with open(self.outFile, 'a') as file: + file.write(f'{self.otolId} {imgPath}\n') + self.numReviewed += 1 + self.getNextImgs() + def reject(self): + """"" React to a user rejecting all images of a set """ + with open(self.outFile, 'a') as file: + file.write(f'{self.otolId}\n') + self.numReviewed += 1 + self.getNextImgs() + def quit(self, e = None): + print(f'Number reviewed: {self.numReviewed}') + timeElapsed = time.time() - self.startTime + print(f'Time elapsed: {timeElapsed:.2f} seconds') + if self.numReviewed > 0: + print(f'Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds') + self.dbCon.close() + self.root.destroy() + def resizeImgForDisplay(self, img): + """ Returns a copy of an image, shrunk to fit it's frame (keeps aspect ratio), and with a background """ + if max(img.width, img.height) > IMG_DISPLAY_SZ: + if (img.width > img.height): + newHeight = int(img.height * IMG_DISPLAY_SZ/img.width) + img = img.resize((IMG_DISPLAY_SZ, newHeight)) + else: + newWidth = int(img.width * IMG_DISPLAY_SZ / img.height) + img = img.resize((newWidth, IMG_DISPLAY_SZ)) + bgImg = PLACEHOLDER_IMG.copy() + bgImg.paste(img, box=( + int((IMG_DISPLAY_SZ - img.width) / 2), + int((IMG_DISPLAY_SZ - img.height) / 2))) + return bgImg + +def reviewImgs(eolImgDir: str, enwikiImgDir: str, dbFile: str, outFile: str, review: str) -> None: + print('Opening database') + dbCon = sqlite3.connect(dbFile) + dbCur = dbCon.cursor() + # + nodeToImgs: dict[str, list[str]] = {} # Maps otol-ids to arrays of image paths + print('Iterating through images from EOL') + if os.path.exists(eolImgDir): + for filename in os.listdir(eolImgDir): + # Get associated EOL ID + eolId, _, _ = filename.partition(' ') + query = 'SELECT nodes.id FROM nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name WHERE eol_ids.id = ?' + # Get associated node IDs + found = False + for (otolId,) in dbCur.execute(query, (int(eolId),)): + if otolId not in nodeToImgs: + nodeToImgs[otolId] = [] + nodeToImgs[otolId].append(os.path.join(eolImgDir, filename)) + found = True + if not found: + print(f'WARNING: No node found for {os.path.join(eolImgDir, filename)}') + print(f'Result: {len(nodeToImgs)} nodes with images') + print('Iterating through images from Wikipedia') + if os.path.exists(enwikiImgDir): + for filename in os.listdir(enwikiImgDir): + # Get associated page ID + wikiId, _, _ = filename.partition('.') + # Get associated node IDs + query = 'SELECT nodes.id FROM nodes INNER JOIN wiki_ids ON nodes.name = wiki_ids.name WHERE wiki_ids.id = ?' + found = False + for (otolId,) in dbCur.execute(query, (int(wikiId),)): + if otolId not in nodeToImgs: + nodeToImgs[otolId] = [] + nodeToImgs[otolId].append(os.path.join(enwikiImgDir, filename)) + found = True + if not found: + print(f'WARNING: No node found for {os.path.join(enwikiImgDir, filename)}') + print(f'Result: {len(nodeToImgs)} nodes with images') + # + print('Filtering out already-made image choices') + oldSz = len(nodeToImgs) + if os.path.exists(outFile): + with open(outFile) as file: + for line in file: + line = line.rstrip() + if ' ' in line: + line = line[:line.find(' ')] + del nodeToImgs[line] + print(f'Filtered out {oldSz - len(nodeToImgs)} entries') + # + # Create GUI and defer control + print('Starting GUI') + root = tki.Tk() + ImgReviewer(root, nodeToImgs, eolImgDir, enwikiImgDir, outFile, dbCon, review) + root.mainloop() + dbCon.close() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.parse_args() + # + reviewImgs(EOL_IMG_DIR, ENWIKI_IMG_DIR, DB_FILE, OUT_FILE, REVIEW) |
