From 515e02b9453f7740d7429ad7e11d913e32e5ffdb Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Fri, 3 Jun 2022 11:01:47 +1000 Subject: Rename some scripts to be eol-specific indicative --- backend/data/README.md | 8 +- backend/data/downloadEolImgs.py | 144 ++++++++++++++++++++++++ backend/data/downloadImgsForReview.py | 144 ------------------------ backend/data/genImgsForWeb.py | 2 +- backend/data/reviewEolImgs.py | 202 ++++++++++++++++++++++++++++++++++ backend/data/reviewImgs.py | 202 ---------------------------------- 6 files changed, 351 insertions(+), 351 deletions(-) create mode 100755 backend/data/downloadEolImgs.py delete mode 100755 backend/data/downloadImgsForReview.py create mode 100755 backend/data/reviewEolImgs.py delete mode 100755 backend/data/reviewImgs.py (limited to 'backend') diff --git a/backend/data/README.md b/backend/data/README.md index 8ee6e41..ece8efb 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -10,12 +10,12 @@ File Generation Process 2 Run genEolNameData.py, which adds 'names' and 'eol\_ids' tables to data.db, using data in eol/vernacularNames.csv and the 'nodes' table. 3 Image Data - 1 Run downloadImgsForReview.py to download EOL images into imgsForReview/. + 1 Run downloadEolImgs.py to download EOL images into eolImgsForReview/. It uses data in eol/imagesList.db, and the 'eol\_ids' table. - 2 Run reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique - images in imgsReviewed/ (uses 'names' and 'eol\_ids' to display extra info). + 2 Run reviewEolImgs.py to filter images in eolImgsForReview/ into EOL-id-unique + images in eolImgsReviewed/ (uses 'names' and 'eol\_ids' to display extra info). 3 Run genImgsForWeb.py to create cropped/resized images in img/, using - images in imgsReviewed, and also to add an 'images' table to data.db. + images in eolImgsReviewed/, and also to add an 'images' table to data.db. 4 Run genLinkedImgs.py to add a 'linked_imgs' table to data.db, which uses 'nodes', 'edges', 'eol_ids', and 'images', to associate nodes without images to child images. diff --git a/backend/data/downloadEolImgs.py b/backend/data/downloadEolImgs.py new file mode 100755 index 0000000..8cf2ba2 --- /dev/null +++ b/backend/data/downloadEolImgs.py @@ -0,0 +1,144 @@ +#!/usr/bin/python3 + +import sys, re, os, random +import sqlite3 +import urllib.parse, requests +import time +from threading import Thread +import signal + +usageInfo = f"usage: {sys.argv[0]}\n" +usageInfo += "Downloads images from URLs specified in an image-list database, using\n" +usageInfo += "EOL IDs obtained from another database. Downloaded images get names of\n" +usageInfo += "the form 'eolId1 contentId1.ext1'\n" +usageInfo += "\n" +usageInfo += "SIGINT causes the program to finish ongoing downloads and exit.\n" +usageInfo += "The program can be re-run to continue downloading, and uses\n" +usageInfo += "existing downloaded files to decide where to continue from.\n" +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) + +imagesListDb = "eol/imagesList.db" +dbFile = "data.db" +outDir = "eolImgsForReview/" +LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain" +POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread) +POST_DL_DELAY_MAX = 3 + +# Get eol-ids from data db +eolIds = set() +print("Reading in EOL IDs") +dbCon = sqlite3.connect(dbFile) +dbCur = dbCon.cursor() +for row in dbCur.execute("SELECT id FROM eol_ids"): + eolIds.add(row[0]) +dbCon.close() +# Get eol-ids from images db +imgDbCon = sqlite3.connect(imagesListDb) +imgCur = imgDbCon.cursor() +imgListIds = set() +for row in imgCur.execute("SELECT DISTINCT page_id FROM images"): + imgListIds.add(row[0]) +# Get eol-id intersection, and sort into list +eolIds = eolIds.intersection(imgListIds) +eolIds = sorted(eolIds) + +MAX_IMGS_PER_ID = 3 +MAX_THREADS = 5 +numThreads = 0 +threadException = None # Used for ending main thread after a non-main thread exception +def downloadImg(url, outFile): + global numThreads, threadException + try: + data = requests.get(url) + with open(outFile, 'wb') as file: + file.write(data.content) + time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN) + except Exception as e: + print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr) + threadException = e + numThreads -= 1 +# Create output directory if not present +if not os.path.exists(outDir): + os.mkdir(outDir) +# Find next eol ID to download for +print("Finding next ID to download for") +nextIdx = 0 +fileList = os.listdir(outDir) +ids = [int(filename.split(" ")[0]) for filename in fileList] +if len(ids) > 0: + ids.sort() + nextIdx = eolIds.index(ids[-1]) + 1 +if nextIdx == len(eolIds): + print("No IDs left. Exiting...") + sys.exit(0) +# Detect SIGINT signals +interrupted = False +oldHandler = None +def onSigint(sig, frame): + global interrupted + interrupted = True + signal.signal(signal.SIGINT, oldHandler) +oldHandler = signal.signal(signal.SIGINT, onSigint) +# Manage downloading +for idx in range(nextIdx, len(eolIds)): + eolId = eolIds[idx] + # Get image urls + imgDataList = [] + ownerSet = set() # Used to get images from different owners, for variety + for row in imgCur.execute( + "SELECT content_id, page_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?", (eolId,)): + license = row[3] + copyrightOwner = row[4] + if re.fullmatch(LICENSE_REGEX, license) == None: + continue + if len(copyrightOwner) > 100: # Ignore certain copyrightOwner fields that seem long and problematic + continue + if copyrightOwner not in ownerSet: + ownerSet.add(copyrightOwner) + imgDataList.append(row) + if len(ownerSet) == MAX_IMGS_PER_ID: + break + if len(imgDataList) == 0: + continue + # Determine output filenames + outFiles = [] + urls = [] + for row in imgDataList: + contentId = row[0] + url = row[2] + if url.startswith("data/"): + url = "https://content.eol.org/" + url + urlParts = urllib.parse.urlparse(url) + extension = os.path.splitext(urlParts.path)[1] + if len(extension) <= 1: + print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr) + continue + outFiles.append(str(eolId) + " " + str(contentId) + extension) + urls.append(url) + # Start downloads + exitLoop = False + for i in range(len(outFiles)): + outPath = outDir + outFiles[i] + if not os.path.exists(outPath): + # Enforce thread limit + while numThreads == MAX_THREADS: + time.sleep(1) + # Wait for threads after an interrupt or thread-exception + if interrupted or threadException != None: + print("Waiting for existing threads to end") + while numThreads > 0: + time.sleep(1) + exitLoop = True + break + print("Downloading image to {outPath}") + # Perform download + numThreads += 1 + thread = Thread(target=downloadImg, args=(urls[i], outPath), daemon=True) + thread.start() + if exitLoop: + break +# Close images-list db +print("Finished downloading") +imgDbCon.close() diff --git a/backend/data/downloadImgsForReview.py b/backend/data/downloadImgsForReview.py deleted file mode 100755 index d1191d7..0000000 --- a/backend/data/downloadImgsForReview.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/python3 - -import sys, re, os, random -import sqlite3 -import urllib.parse, requests -import time -from threading import Thread -import signal - -usageInfo = f"usage: {sys.argv[0]}\n" -usageInfo += "Downloads images from URLs specified in an image-list database, using\n" -usageInfo += "EOL IDs obtained from another database. Downloaded images get names of\n" -usageInfo += "the form 'eolId1 contentId1.ext1'\n" -usageInfo += "\n" -usageInfo += "SIGINT causes the program to finish ongoing downloads and exit.\n" -usageInfo += "The program can be re-run to continue downloading, and uses\n" -usageInfo += "existing downloaded files to decide where to continue from.\n" -if len(sys.argv) > 1: - print(usageInfo, file=sys.stderr) - sys.exit(1) - -imagesListDb = "eol/imagesList.db" -dbFile = "data.db" -outDir = "imgsForReview/" -LICENSE_REGEX = r"cc-by((-nc)?(-sa)?(-[234]\.[05])?)|cc-publicdomain|cc-0-1\.0|public domain" -POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread) -POST_DL_DELAY_MAX = 3 - -# Get eol-ids from data db -eolIds = set() -print("Reading in EOL IDs") -dbCon = sqlite3.connect(dbFile) -dbCur = dbCon.cursor() -for row in dbCur.execute("SELECT id FROM eol_ids"): - eolIds.add(row[0]) -dbCon.close() -# Get eol-ids from images db -imgDbCon = sqlite3.connect(imagesListDb) -imgCur = imgDbCon.cursor() -imgListIds = set() -for row in imgCur.execute("SELECT DISTINCT page_id FROM images"): - imgListIds.add(row[0]) -# Get eol-id intersection, and sort into list -eolIds = eolIds.intersection(imgListIds) -eolIds = sorted(eolIds) - -MAX_IMGS_PER_ID = 3 -MAX_THREADS = 5 -numThreads = 0 -threadException = None # Used for ending main thread after a non-main thread exception -def downloadImg(url, outFile): - global numThreads, threadException - try: - data = requests.get(url) - with open(outFile, 'wb') as file: - file.write(data.content) - time.sleep(random.random() * (POST_DL_DELAY_MAX - POST_DL_DELAY_MIN) + POST_DL_DELAY_MIN) - except Exception as e: - print(f"Error while downloading to {outFile}: {str(e)}", file=sys.stderr) - threadException = e - numThreads -= 1 -# Create output directory if not present -if not os.path.exists(outDir): - os.mkdir(outDir) -# Find next eol ID to download for -print("Finding next ID to download for") -nextIdx = 0 -fileList = os.listdir(outDir) -ids = [int(filename.split(" ")[0]) for filename in fileList] -if len(ids) > 0: - ids.sort() - nextIdx = eolIds.index(ids[-1]) + 1 -if nextIdx == len(eolIds): - print("No IDs left. Exiting...") - sys.exit(0) -# Detect SIGINT signals -interrupted = False -oldHandler = None -def onSigint(sig, frame): - global interrupted - interrupted = True - signal.signal(signal.SIGINT, oldHandler) -oldHandler = signal.signal(signal.SIGINT, onSigint) -# Manage downloading -for idx in range(nextIdx, len(eolIds)): - eolId = eolIds[idx] - # Get image urls - imgDataList = [] - ownerSet = set() # Used to get images from different owners, for variety - for row in imgCur.execute( - "SELECT content_id, page_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?", (eolId,)): - license = row[3] - copyrightOwner = row[4] - if re.fullmatch(LICENSE_REGEX, license) == None: - continue - if len(copyrightOwner) > 100: # Ignore certain copyrightOwner fields that seem long and problematic - continue - if copyrightOwner not in ownerSet: - ownerSet.add(copyrightOwner) - imgDataList.append(row) - if len(ownerSet) == MAX_IMGS_PER_ID: - break - if len(imgDataList) == 0: - continue - # Determine output filenames - outFiles = [] - urls = [] - for row in imgDataList: - contentId = row[0] - url = row[2] - if url.startswith("data/"): - url = "https://content.eol.org/" + url - urlParts = urllib.parse.urlparse(url) - extension = os.path.splitext(urlParts.path)[1] - if len(extension) <= 1: - print(f"WARNING: No filename extension found in URL {url}", file=sys.stderr) - continue - outFiles.append(str(eolId) + " " + str(contentId) + extension) - urls.append(url) - # Start downloads - exitLoop = False - for i in range(len(outFiles)): - outPath = outDir + outFiles[i] - if not os.path.exists(outPath): - # Enforce thread limit - while numThreads == MAX_THREADS: - time.sleep(1) - # Wait for threads after an interrupt or thread-exception - if interrupted or threadException != None: - print("Waiting for existing threads to end") - while numThreads > 0: - time.sleep(1) - exitLoop = True - break - print("Downloading image to {outPath}") - # Perform download - numThreads += 1 - thread = Thread(target=downloadImg, args=(urls[i], outPath), daemon=True) - thread.start() - if exitLoop: - break -# Close images-list db -print("Finished downloading") -imgDbCon.close() diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py index 9db260f..d1eef1b 100755 --- a/backend/data/genImgsForWeb.py +++ b/backend/data/genImgsForWeb.py @@ -16,7 +16,7 @@ if len(sys.argv) > 1: print(usageInfo, file=sys.stderr) sys.exit(1) -imgDir = "imgsReviewed/" +imgDir = "eolImgsReviewed/" outDir = "img/" imagesListDb = "eol/imagesList.db" dbFile = "data.db" diff --git a/backend/data/reviewEolImgs.py b/backend/data/reviewEolImgs.py new file mode 100755 index 0000000..08b8478 --- /dev/null +++ b/backend/data/reviewEolImgs.py @@ -0,0 +1,202 @@ +#!/usr/bin/python3 + +import sys, re, os, time +import sqlite3 +import tkinter as tki +from tkinter import ttk +import PIL +from PIL import ImageTk, Image, ImageOps + +usageInfo = f"usage: {sys.argv[0]}\n" +usageInfo += "Provides a GUI for reviewing images. Looks in a for-review directory for\n" +usageInfo += "images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to\n" +usageInfo += "choose an image to keep, or reject all. Also provides image rotation.\n" +usageInfo += "Chosen images are placed in another directory, and rejected ones are deleted.\n" +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) + +imgDir = "eolImgsForReview/" +outDir = "eolImgsReviewed/" +dbFile = "data.db" +IMG_DISPLAY_SZ = 400 +MAX_IMGS_PER_ID = 3 +PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135)) + +# Create output directory if not present +if not os.path.exists(outDir): + os.mkdir(outDir) +# Get images for review +print("Reading input image list") +imgList = os.listdir(imgDir) +imgList.sort(key=lambda s: int(s.split(" ")[0])) +if len(imgList) == 0: + print("No input images found", file=sys.stderr) + sys.exit(1) +# Open db +dbCon = sqlite3.connect(dbFile) +dbCur = dbCon.cursor() + +class EolImgReviewer: + """ Provides the GUI for reviewing images """ + def __init__(self, root, imgList): + self.root = root + root.title("EOL Image Reviewer") + # Setup main frame + mainFrame = ttk.Frame(root, padding="5 5 5 5") + mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S)) + root.columnconfigure(0, weight=1) + root.rowconfigure(0, weight=1) + # Set up images-to-be-reviewed frames + self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation + self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter + # These need a persistent reference for some reason (doesn't display otherwise) + self.labels = [] + for i in range(MAX_IMGS_PER_ID): + frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ) + frame.grid(column=i, row=0) + label = ttk.Label(frame, image=self.photoImgs[i]) + label.grid(column=0, row=0) + self.labels.append(label) + # Add padding + for child in mainFrame.winfo_children(): + child.grid_configure(padx=5, pady=5) + # Add bindings + root.bind("", self.quit) + root.bind("", lambda evt: self.accept(0)) + root.bind("", lambda evt: self.accept(1)) + root.bind("", lambda evt: self.accept(2)) + root.bind("", lambda evt: self.reject()) + root.bind("", lambda evt: self.rotate(0)) + root.bind("", lambda evt: self.rotate(1)) + root.bind("", lambda evt: self.rotate(2)) + root.bind("", lambda evt: self.rotate(0, True)) + root.bind("", lambda evt: self.rotate(1, True)) + root.bind("", lambda evt: self.rotate(2, True)) + # Initialise images to review + self.imgList = imgList + self.imgListIdx = 0 + self.nextEolId = 0 + self.nextImgNames = [] + self.rotations = [] + self.getNextImgs() + # For more info + self.numReviewed = 0 + self.startTime = time.time() + def getNextImgs(self): + """ Updates display with new images to review, or ends program """ + # Gather names of next images to review + for i in range(MAX_IMGS_PER_ID): + if self.imgListIdx == len(self.imgList): + if i == 0: + self.quit() + return + break + imgName = self.imgList[self.imgListIdx] + eolId = int(re.match(r"(\d+) (\d+)", imgName).group(1)) + if i == 0: + self.nextEolId = eolId + self.nextImgNames = [imgName] + self.rotations = [0] + else: + if self.nextEolId != eolId: + break + self.nextImgNames.append(imgName) + self.rotations.append(0) + self.imgListIdx += 1 + # Update displayed images + idx = 0 + while idx < MAX_IMGS_PER_ID: + if idx < len(self.nextImgNames): + try: + img = Image.open(imgDir + self.nextImgNames[idx]) + img = ImageOps.exif_transpose(img) + except PIL.UnidentifiedImageError: + os.remove(imgDir + self.nextImgNames[idx]) + del self.nextImgNames[idx] + del self.rotations[idx] + continue + self.imgs[idx] = self.resizeForDisplay(img) + else: + self.imgs[idx] = PLACEHOLDER_IMG + self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx]) + self.labels[idx].config(image=self.photoImgs[idx]) + idx += 1 + # Restart if all image files non-recognisable + if len(self.nextImgNames) == 0: + self.getNextImgs() + return + # Update title + firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1 + lastImgIdx = self.imgListIdx + query = "SELECT eol_ids.id, names.alt_name, names.pref_alt FROM" \ + " names INNER JOIN eol_ids ON eol_ids.name = names.name" \ + " WHERE id = ? and pref_alt = 1" + row = dbCur.execute(query, (self.nextEolId,)).fetchone() + if row != None: + commonName = row[1] + self.root.title( + f"Reviewing EOL ID {self.nextEolId}, aka \"{commonName}\"" \ + f"(imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") + else: + self.root.title( + f"Reviewing EOL ID {self.nextEolId} (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") + def accept(self, imgIdx): + """ React to a user selecting an image """ + if imgIdx >= len(self.nextImgNames): + print("Invalid selection") + return + for i in range(len(self.nextImgNames)): + inFile = imgDir + self.nextImgNames[i] + if i == imgIdx: # Move accepted image, rotating if needed + outFile = outDir + self.nextImgNames[i] + img = Image.open(inFile) + img = ImageOps.exif_transpose(img) + if self.rotations[i] != 0: + img = img.rotate(self.rotations[i], expand=True) + img.save(outFile) + os.remove(inFile) + else: # Delete non-accepted image + os.remove(inFile) + self.numReviewed += 1 + self.getNextImgs() + def reject(self): + """ React to a user rejecting all images of a set """ + for i in range(len(self.nextImgNames)): + os.remove(imgDir + self.nextImgNames[i]) + self.numReviewed += 1 + self.getNextImgs() + def rotate(self, imgIdx, anticlockwise = False): + """ Respond to a user rotating an image """ + deg = -90 if not anticlockwise else 90 + self.imgs[imgIdx] = self.imgs[imgIdx].rotate(deg) + self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx]) + self.labels[imgIdx].config(image=self.photoImgs[imgIdx]) + self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360 + def quit(self, e = None): + print(f"Number reviewed: {self.numReviewed}") + timeElapsed = time.time() - self.startTime + print(f"Time elapsed: {timeElapsed:.2f} seconds") + if self.numReviewed > 0: + print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds") + dbCon.close() + self.root.destroy() + def resizeForDisplay(self, img): + """ Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """ + if max(img.width, img.height) > IMG_DISPLAY_SZ: + if (img.width > img.height): + newHeight = int(img.height * IMG_DISPLAY_SZ/img.width) + img = img.resize((IMG_DISPLAY_SZ, newHeight)) + else: + newWidth = int(img.width * IMG_DISPLAY_SZ / img.height) + img = img.resize((newWidth, IMG_DISPLAY_SZ)) + bgImg = PLACEHOLDER_IMG.copy() + bgImg.paste(img, box=( + int((IMG_DISPLAY_SZ - img.width) / 2), + int((IMG_DISPLAY_SZ - img.height) / 2))) + return bgImg +# Create GUI and defer control +root = tki.Tk() +EolImgReviewer(root, imgList) +root.mainloop() + diff --git a/backend/data/reviewImgs.py b/backend/data/reviewImgs.py deleted file mode 100755 index 63e7dd5..0000000 --- a/backend/data/reviewImgs.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/python3 - -import sys, re, os, time -import sqlite3 -import tkinter as tki -from tkinter import ttk -import PIL -from PIL import ImageTk, Image, ImageOps - -usageInfo = f"usage: {sys.argv[0]}\n" -usageInfo += "Provides a GUI for reviewing images. Looks in a for-review directory for\n" -usageInfo += "images named 'eolId1 contentId1.ext1', and, for each EOL ID, enables the user to\n" -usageInfo += "choose an image to keep, or reject all. Also provides image rotation.\n" -usageInfo += "Chosen images are placed in another directory, and rejected ones are deleted.\n" -if len(sys.argv) > 1: - print(usageInfo, file=sys.stderr) - sys.exit(1) - -imgDir = "imgsForReview/" -outDir = "imgsReviewed/" -dbFile = "data.db" -IMG_DISPLAY_SZ = 400 -MAX_IMGS_PER_ID = 3 -PLACEHOLDER_IMG = Image.new("RGB", (IMG_DISPLAY_SZ, IMG_DISPLAY_SZ), (88, 28, 135)) - -# Create output directory if not present -if not os.path.exists(outDir): - os.mkdir(outDir) -# Get images for review -print("Reading input image list") -imgList = os.listdir(imgDir) -imgList.sort(key=lambda s: int(s.split(" ")[0])) -if len(imgList) == 0: - print("No input images found", file=sys.stderr) - sys.exit(1) -# Open db -dbCon = sqlite3.connect(dbFile) -dbCur = dbCon.cursor() - -class EolImgReviewer: - """ Provides the GUI for reviewing images """ - def __init__(self, root, imgList): - self.root = root - root.title("EOL Image Reviewer") - # Setup main frame - mainFrame = ttk.Frame(root, padding="5 5 5 5") - mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S)) - root.columnconfigure(0, weight=1) - root.rowconfigure(0, weight=1) - # Set up images-to-be-reviewed frames - self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation - self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter - # These need a persistent reference for some reason (doesn't display otherwise) - self.labels = [] - for i in range(MAX_IMGS_PER_ID): - frame = ttk.Frame(mainFrame, width=IMG_DISPLAY_SZ, height=IMG_DISPLAY_SZ) - frame.grid(column=i, row=0) - label = ttk.Label(frame, image=self.photoImgs[i]) - label.grid(column=0, row=0) - self.labels.append(label) - # Add padding - for child in mainFrame.winfo_children(): - child.grid_configure(padx=5, pady=5) - # Add bindings - root.bind("", self.quit) - root.bind("", lambda evt: self.accept(0)) - root.bind("", lambda evt: self.accept(1)) - root.bind("", lambda evt: self.accept(2)) - root.bind("", lambda evt: self.reject()) - root.bind("", lambda evt: self.rotate(0)) - root.bind("", lambda evt: self.rotate(1)) - root.bind("", lambda evt: self.rotate(2)) - root.bind("", lambda evt: self.rotate(0, True)) - root.bind("", lambda evt: self.rotate(1, True)) - root.bind("", lambda evt: self.rotate(2, True)) - # Initialise images to review - self.imgList = imgList - self.imgListIdx = 0 - self.nextEolId = 0 - self.nextImgNames = [] - self.rotations = [] - self.getNextImgs() - # For more info - self.numReviewed = 0 - self.startTime = time.time() - def getNextImgs(self): - """ Updates display with new images to review, or ends program """ - # Gather names of next images to review - for i in range(MAX_IMGS_PER_ID): - if self.imgListIdx == len(self.imgList): - if i == 0: - self.quit() - return - break - imgName = self.imgList[self.imgListIdx] - eolId = int(re.match(r"(\d+) (\d+)", imgName).group(1)) - if i == 0: - self.nextEolId = eolId - self.nextImgNames = [imgName] - self.rotations = [0] - else: - if self.nextEolId != eolId: - break - self.nextImgNames.append(imgName) - self.rotations.append(0) - self.imgListIdx += 1 - # Update displayed images - idx = 0 - while idx < MAX_IMGS_PER_ID: - if idx < len(self.nextImgNames): - try: - img = Image.open(imgDir + self.nextImgNames[idx]) - img = ImageOps.exif_transpose(img) - except PIL.UnidentifiedImageError: - os.remove(imgDir + self.nextImgNames[idx]) - del self.nextImgNames[idx] - del self.rotations[idx] - continue - self.imgs[idx] = self.resizeForDisplay(img) - else: - self.imgs[idx] = PLACEHOLDER_IMG - self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx]) - self.labels[idx].config(image=self.photoImgs[idx]) - idx += 1 - # Restart if all image files non-recognisable - if len(self.nextImgNames) == 0: - self.getNextImgs() - return - # Update title - firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1 - lastImgIdx = self.imgListIdx - query = "SELECT eol_ids.id, names.alt_name, names.pref_alt FROM" \ - " names INNER JOIN eol_ids ON eol_ids.name = names.name" \ - " WHERE id = ? and pref_alt = 1" - row = dbCur.execute(query, (self.nextEolId,)).fetchone() - if row != None: - commonName = row[1] - self.root.title( - f"Reviewing EOL ID {self.nextEolId}, aka \"{commonName}\"" \ - f"(imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") - else: - self.root.title( - f"Reviewing EOL ID {self.nextEolId} (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})") - def accept(self, imgIdx): - """ React to a user selecting an image """ - if imgIdx >= len(self.nextImgNames): - print("Invalid selection") - return - for i in range(len(self.nextImgNames)): - inFile = imgDir + self.nextImgNames[i] - if i == imgIdx: # Move accepted image, rotating if needed - outFile = outDir + self.nextImgNames[i] - img = Image.open(inFile) - img = ImageOps.exif_transpose(img) - if self.rotations[i] != 0: - img = img.rotate(self.rotations[i], expand=True) - img.save(outFile) - os.remove(inFile) - else: # Delete non-accepted image - os.remove(inFile) - self.numReviewed += 1 - self.getNextImgs() - def reject(self): - """ React to a user rejecting all images of a set """ - for i in range(len(self.nextImgNames)): - os.remove(imgDir + self.nextImgNames[i]) - self.numReviewed += 1 - self.getNextImgs() - def rotate(self, imgIdx, anticlockwise = False): - """ Respond to a user rotating an image """ - deg = -90 if not anticlockwise else 90 - self.imgs[imgIdx] = self.imgs[imgIdx].rotate(deg) - self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx]) - self.labels[imgIdx].config(image=self.photoImgs[imgIdx]) - self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360 - def quit(self, e = None): - print(f"Number reviewed: {self.numReviewed}") - timeElapsed = time.time() - self.startTime - print(f"Time elapsed: {timeElapsed:.2f} seconds") - if self.numReviewed > 0: - print(f"Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds") - dbCon.close() - self.root.destroy() - def resizeForDisplay(self, img): - """ Returns a copy of an image, shrunk to fit the display (keeps aspect ratio), and with a background """ - if max(img.width, img.height) > IMG_DISPLAY_SZ: - if (img.width > img.height): - newHeight = int(img.height * IMG_DISPLAY_SZ/img.width) - img = img.resize((IMG_DISPLAY_SZ, newHeight)) - else: - newWidth = int(img.width * IMG_DISPLAY_SZ / img.height) - img = img.resize((newWidth, IMG_DISPLAY_SZ)) - bgImg = PLACEHOLDER_IMG.copy() - bgImg.paste(img, box=( - int((IMG_DISPLAY_SZ - img.width) / 2), - int((IMG_DISPLAY_SZ - img.height) / 2))) - return bgImg -# Create GUI and defer control -root = tki.Tk() -EolImgReviewer(root, imgList) -root.mainloop() - -- cgit v1.2.3