diff options
| author | Terry Truong <terry06890@gmail.com> | 2023-01-29 11:30:47 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2023-01-29 11:30:47 +1100 |
| commit | 8781fdb2b8c530a6c1531ae9e82221eb062e34fb (patch) | |
| tree | ffd824aa9b945d69b47f012617ee13d98764d078 /backend/tol_data/eol | |
| parent | f5e87ae628bab0eef97b3e3e62f6d71cca9c99c0 (diff) | |
Adjust backend coding style
Add line spacing, section comments, and import consistency
Diffstat (limited to 'backend/tol_data/eol')
| -rwxr-xr-x | backend/tol_data/eol/download_imgs.py | 28 | ||||
| -rwxr-xr-x | backend/tol_data/eol/gen_images_list_db.py | 13 | ||||
| -rwxr-xr-x | backend/tol_data/eol/review_imgs.py | 33 |
3 files changed, 57 insertions, 17 deletions
diff --git a/backend/tol_data/eol/download_imgs.py b/backend/tol_data/eol/download_imgs.py index 8454a35..5757032 100755 --- a/backend/tol_data/eol/download_imgs.py +++ b/backend/tol_data/eol/download_imgs.py @@ -13,9 +13,16 @@ already-downloaded files, and continues after the one with highest EOL ID. """ -import sys, re, os, random +import argparse +import sys +import re +import os +import random import sqlite3 -import urllib.parse, requests + +import requests +import urllib.parse + import time from threading import Thread import signal @@ -23,7 +30,7 @@ import signal IMAGES_LIST_DB = 'images_list.db' OUT_DIR = 'imgs_for_review' DB_FILE = os.path.join('..', 'data.db') -# + MAX_IMGS_PER_ID = 3 MAX_THREADS = 5 POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread) @@ -43,7 +50,7 @@ def downloadImgs(eolIds, imagesListDb, outDir): eolIdList = sorted(eolIds) nextIdx = 0 print(f'Result: {len(eolIdList)} EOL IDs') - # + print('Checking output directory') if not os.path.exists(outDir): os.mkdir(outDir) @@ -57,7 +64,7 @@ def downloadImgs(eolIds, imagesListDb, outDir): if nextIdx == len(eolIdList): print('No IDs left. Exiting...') return - # + print('Starting download threads') numThreads = 0 threadException: Exception | None = None # Used for ending main thread after a non-main thread exception @@ -81,6 +88,7 @@ def downloadImgs(eolIds, imagesListDb, outDir): print(f'Error while downloading to {outFile}: {str(e)}', file=sys.stderr) threadException = e numThreads -= 1 + # Manage downloading for idx in range(nextIdx, len(eolIdList)): eolId = eolIdList[idx] @@ -96,9 +104,11 @@ def downloadImgs(eolIds, imagesListDb, outDir): if len(extension) <= 1: print(f'WARNING: No filename extension found in URL {url}', file=sys.stderr) continue + # Check image-quantity limit if len(ownerSet) == MAX_IMGS_PER_ID: break + # Check for skip conditions if re.fullmatch(LICENSE_REGEX, license) is None: continue @@ -107,11 +117,13 @@ def downloadImgs(eolIds, imagesListDb, outDir): if copyrightOwner in ownerSet: continue ownerSet.add(copyrightOwner) + # Determine output filename outPath = os.path.join(outDir, f'{eolId} {contentId}{extension}') if os.path.exists(outPath): print(f'WARNING: {outPath} already exists. Skipping download.') continue + # Check thread limit while numThreads == MAX_THREADS: time.sleep(1) @@ -122,6 +134,7 @@ def downloadImgs(eolIds, imagesListDb, outDir): time.sleep(1) exitLoop = True break + # Perform download print(f'Downloading image to {outPath}') numThreads += 1 @@ -129,6 +142,7 @@ def downloadImgs(eolIds, imagesListDb, outDir): thread.start() if exitLoop: break + # Close images-list db while numThreads > 0: time.sleep(1) @@ -143,10 +157,10 @@ def getEolIdsFromDb(dbFile) -> set[int]: eolIds.add(id) dbCon.close() return eolIds + if __name__ == '__main__': - import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() - # + eolIds = getEolIdsFromDb(DB_FILE) downloadImgs(eolIds, IMAGES_LIST_DB, OUT_DIR) diff --git a/backend/tol_data/eol/gen_images_list_db.py b/backend/tol_data/eol/gen_images_list_db.py index ee57ac6..3e5bea1 100755 --- a/backend/tol_data/eol/gen_images_list_db.py +++ b/backend/tol_data/eol/gen_images_list_db.py @@ -4,8 +4,12 @@ Generates a sqlite db from a directory of CSV files holding EOL image data """ -import os, glob -import csv, re, sqlite3 +import argparse +import os +import glob +import csv +import re +import sqlite3 IMAGE_LISTS_GLOB = os.path.join('imagesList', '*.csv') DB_FILE = 'images_list.db' @@ -18,6 +22,7 @@ def genData(imageListsGlob: str, dbFile: str) -> None: ' (content_id INT PRIMARY KEY, page_id INT, source_url TEXT,' \ ' copy_url TEXT, license TEXT, copyright_owner TEXT)') dbCur.execute('CREATE INDEX images_pid_idx ON images(page_id)') + print('Reading CSV files') for filename in glob.glob(imageListsGlob): print(f'Processing {filename}') @@ -27,13 +32,13 @@ def genData(imageListsGlob: str, dbFile: str) -> None: continue dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)', (int(contentId), int(pageId), sourceUrl, copyUrl, license, owner)) + print('Closing database') dbCon.commit() dbCon.close() if __name__ == '__main__': - import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() - # + genData(IMAGE_LISTS_GLOB, DB_FILE) diff --git a/backend/tol_data/eol/review_imgs.py b/backend/tol_data/eol/review_imgs.py index 9fb462c..145f338 100755 --- a/backend/tol_data/eol/review_imgs.py +++ b/backend/tol_data/eol/review_imgs.py @@ -7,8 +7,13 @@ choose an image to keep, or reject all. Also provides image rotation. Chosen images are placed in another directory, and rejected ones are deleted. """ -import sys, re, os, time +import argparse +import sys +import re +import os +import time import sqlite3 + import tkinter as tki from tkinter import ttk import PIL @@ -17,7 +22,7 @@ from PIL import ImageTk, Image, ImageOps IMG_DIR = 'imgs_for_review' OUT_DIR = 'imgs' EXTRA_INFO_DB = os.path.join('..', 'data.db') -# + IMG_DISPLAY_SZ = 400 MAX_IMGS_PER_ID = 3 IMG_BG_COLOR = (88, 28, 135) @@ -28,11 +33,13 @@ class EolImgReviewer: def __init__(self, root, imgDir, imgList, extraInfoDb, outDir): self.root = root root.title('EOL Image Reviewer') + # Setup main frame mainFrame = ttk.Frame(root, padding='5 5 5 5') mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S)) root.columnconfigure(0, weight=1) root.rowconfigure(0, weight=1) + # Set up images-to-be-reviewed frames self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter @@ -44,9 +51,11 @@ class EolImgReviewer: label = ttk.Label(frame, image=self.photoImgs[i]) label.grid(column=0, row=0) self.labels.append(label) + # Add padding for child in mainFrame.winfo_children(): child.grid_configure(padx=5, pady=5) + # Add keyboard bindings root.bind('<q>', self.quit) root.bind('<Key-j>', lambda evt: self.accept(0)) @@ -59,6 +68,7 @@ class EolImgReviewer: root.bind('<Key-A>', lambda evt: self.rotate(0, True)) root.bind('<Key-S>', lambda evt: self.rotate(1, True)) root.bind('<Key-D>', lambda evt: self.rotate(2, True)) + # Initialise fields self.imgDir = imgDir self.imgList = imgList @@ -67,13 +77,15 @@ class EolImgReviewer: self.nextEolId = 0 self.nextImgNames: list[str] = [] self.rotations: list[int] = [] + # For displaying extra info self.extraInfoDbCon = sqlite3.connect(extraInfoDb) self.extraInfoDbCur = self.extraInfoDbCon.cursor() self.numReviewed = 0 self.startTime = time.time() - # + self.getNextImgs() + def getNextImgs(self): """ Updates display with new images to review, or ends program """ # Gather names of next images to review @@ -95,6 +107,7 @@ class EolImgReviewer: self.nextImgNames.append(imgName) self.rotations.append(0) self.imgListIdx += 1 + # Update displayed images idx = 0 while idx < MAX_IMGS_PER_ID: @@ -113,16 +126,19 @@ class EolImgReviewer: self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx]) self.labels[idx].config(image=self.photoImgs[idx]) idx += 1 + # Restart if all image files non-recognisable if not self.nextImgNames: self.getNextImgs() return + # Update title firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1 lastImgIdx = self.imgListIdx title = self.getExtraInfo(self.nextEolId) title += f' (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})' self.root.title(title) + def accept(self, imgIdx): """ React to a user selecting an image """ if imgIdx >= len(self.nextImgNames): @@ -142,12 +158,14 @@ class EolImgReviewer: os.remove(inFile) self.numReviewed += 1 self.getNextImgs() + def reject(self): """ React to a user rejecting all images of a set """ for i in range(len(self.nextImgNames)): os.remove(os.path.join(self.imgDir, self.nextImgNames[i])) self.numReviewed += 1 self.getNextImgs() + def rotate(self, imgIdx, anticlockwise = False): """ Respond to a user rotating an image """ deg = -90 if not anticlockwise else 90 @@ -155,6 +173,7 @@ class EolImgReviewer: self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx]) self.labels[imgIdx].config(image=self.photoImgs[imgIdx]) self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360 + def quit(self, e = None): print(f'Number reviewed: {self.numReviewed}') timeElapsed = time.time() - self.startTime @@ -163,7 +182,7 @@ class EolImgReviewer: print(f'Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds') self.extraInfoDbCon.close() self.root.destroy() - # + def resizeImgForDisplay(self, img): """ Returns a copy of an image, shrunk to fit in it's frame (keeps aspect ratio), and with a background """ if max(img.width, img.height) > IMG_DISPLAY_SZ: @@ -178,6 +197,7 @@ class EolImgReviewer: int((IMG_DISPLAY_SZ - img.width) / 2), int((IMG_DISPLAY_SZ - img.height) / 2))) return bgImg + def getExtraInfo(self, eolId: int) -> str: """ Used to display extra EOL ID info """ query = 'SELECT names.alt_name FROM' \ @@ -193,12 +213,14 @@ def reviewImgs(imgDir: str, outDir: str, extraInfoDb: str): print('Checking output directory') if not os.path.exists(outDir): os.mkdir(outDir) + print('Getting input image list') imgList = os.listdir(imgDir) imgList.sort(key=lambda s: int(s.split(' ')[0])) if not imgList: print('No input images found') sys.exit(0) + # Create GUI and defer control print('Starting GUI') root = tki.Tk() @@ -206,8 +228,7 @@ def reviewImgs(imgDir: str, outDir: str, extraInfoDb: str): root.mainloop() if __name__ == '__main__': - import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() - # + reviewImgs(IMG_DIR, OUT_DIR, EXTRA_INFO_DB) |
