aboutsummaryrefslogtreecommitdiff
path: root/backend/tol_data/eol
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2023-01-29 11:30:47 +1100
committerTerry Truong <terry06890@gmail.com>2023-01-29 11:30:47 +1100
commit8781fdb2b8c530a6c1531ae9e82221eb062e34fb (patch)
treeffd824aa9b945d69b47f012617ee13d98764d078 /backend/tol_data/eol
parentf5e87ae628bab0eef97b3e3e62f6d71cca9c99c0 (diff)
Adjust backend coding style
Add line spacing, section comments, and import consistency
Diffstat (limited to 'backend/tol_data/eol')
-rwxr-xr-xbackend/tol_data/eol/download_imgs.py28
-rwxr-xr-xbackend/tol_data/eol/gen_images_list_db.py13
-rwxr-xr-xbackend/tol_data/eol/review_imgs.py33
3 files changed, 57 insertions, 17 deletions
diff --git a/backend/tol_data/eol/download_imgs.py b/backend/tol_data/eol/download_imgs.py
index 8454a35..5757032 100755
--- a/backend/tol_data/eol/download_imgs.py
+++ b/backend/tol_data/eol/download_imgs.py
@@ -13,9 +13,16 @@ already-downloaded files, and continues after the one with
highest EOL ID.
"""
-import sys, re, os, random
+import argparse
+import sys
+import re
+import os
+import random
import sqlite3
-import urllib.parse, requests
+
+import requests
+import urllib.parse
+
import time
from threading import Thread
import signal
@@ -23,7 +30,7 @@ import signal
IMAGES_LIST_DB = 'images_list.db'
OUT_DIR = 'imgs_for_review'
DB_FILE = os.path.join('..', 'data.db')
-#
+
MAX_IMGS_PER_ID = 3
MAX_THREADS = 5
POST_DL_DELAY_MIN = 2 # Minimum delay in seconds to pause after download before starting another (for each thread)
@@ -43,7 +50,7 @@ def downloadImgs(eolIds, imagesListDb, outDir):
eolIdList = sorted(eolIds)
nextIdx = 0
print(f'Result: {len(eolIdList)} EOL IDs')
- #
+
print('Checking output directory')
if not os.path.exists(outDir):
os.mkdir(outDir)
@@ -57,7 +64,7 @@ def downloadImgs(eolIds, imagesListDb, outDir):
if nextIdx == len(eolIdList):
print('No IDs left. Exiting...')
return
- #
+
print('Starting download threads')
numThreads = 0
threadException: Exception | None = None # Used for ending main thread after a non-main thread exception
@@ -81,6 +88,7 @@ def downloadImgs(eolIds, imagesListDb, outDir):
print(f'Error while downloading to {outFile}: {str(e)}', file=sys.stderr)
threadException = e
numThreads -= 1
+
# Manage downloading
for idx in range(nextIdx, len(eolIdList)):
eolId = eolIdList[idx]
@@ -96,9 +104,11 @@ def downloadImgs(eolIds, imagesListDb, outDir):
if len(extension) <= 1:
print(f'WARNING: No filename extension found in URL {url}', file=sys.stderr)
continue
+
# Check image-quantity limit
if len(ownerSet) == MAX_IMGS_PER_ID:
break
+
# Check for skip conditions
if re.fullmatch(LICENSE_REGEX, license) is None:
continue
@@ -107,11 +117,13 @@ def downloadImgs(eolIds, imagesListDb, outDir):
if copyrightOwner in ownerSet:
continue
ownerSet.add(copyrightOwner)
+
# Determine output filename
outPath = os.path.join(outDir, f'{eolId} {contentId}{extension}')
if os.path.exists(outPath):
print(f'WARNING: {outPath} already exists. Skipping download.')
continue
+
# Check thread limit
while numThreads == MAX_THREADS:
time.sleep(1)
@@ -122,6 +134,7 @@ def downloadImgs(eolIds, imagesListDb, outDir):
time.sleep(1)
exitLoop = True
break
+
# Perform download
print(f'Downloading image to {outPath}')
numThreads += 1
@@ -129,6 +142,7 @@ def downloadImgs(eolIds, imagesListDb, outDir):
thread.start()
if exitLoop:
break
+
# Close images-list db
while numThreads > 0:
time.sleep(1)
@@ -143,10 +157,10 @@ def getEolIdsFromDb(dbFile) -> set[int]:
eolIds.add(id)
dbCon.close()
return eolIds
+
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
- #
+
eolIds = getEolIdsFromDb(DB_FILE)
downloadImgs(eolIds, IMAGES_LIST_DB, OUT_DIR)
diff --git a/backend/tol_data/eol/gen_images_list_db.py b/backend/tol_data/eol/gen_images_list_db.py
index ee57ac6..3e5bea1 100755
--- a/backend/tol_data/eol/gen_images_list_db.py
+++ b/backend/tol_data/eol/gen_images_list_db.py
@@ -4,8 +4,12 @@
Generates a sqlite db from a directory of CSV files holding EOL image data
"""
-import os, glob
-import csv, re, sqlite3
+import argparse
+import os
+import glob
+import csv
+import re
+import sqlite3
IMAGE_LISTS_GLOB = os.path.join('imagesList', '*.csv')
DB_FILE = 'images_list.db'
@@ -18,6 +22,7 @@ def genData(imageListsGlob: str, dbFile: str) -> None:
' (content_id INT PRIMARY KEY, page_id INT, source_url TEXT,' \
' copy_url TEXT, license TEXT, copyright_owner TEXT)')
dbCur.execute('CREATE INDEX images_pid_idx ON images(page_id)')
+
print('Reading CSV files')
for filename in glob.glob(imageListsGlob):
print(f'Processing {filename}')
@@ -27,13 +32,13 @@ def genData(imageListsGlob: str, dbFile: str) -> None:
continue
dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)',
(int(contentId), int(pageId), sourceUrl, copyUrl, license, owner))
+
print('Closing database')
dbCon.commit()
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
- #
+
genData(IMAGE_LISTS_GLOB, DB_FILE)
diff --git a/backend/tol_data/eol/review_imgs.py b/backend/tol_data/eol/review_imgs.py
index 9fb462c..145f338 100755
--- a/backend/tol_data/eol/review_imgs.py
+++ b/backend/tol_data/eol/review_imgs.py
@@ -7,8 +7,13 @@ choose an image to keep, or reject all. Also provides image rotation.
Chosen images are placed in another directory, and rejected ones are deleted.
"""
-import sys, re, os, time
+import argparse
+import sys
+import re
+import os
+import time
import sqlite3
+
import tkinter as tki
from tkinter import ttk
import PIL
@@ -17,7 +22,7 @@ from PIL import ImageTk, Image, ImageOps
IMG_DIR = 'imgs_for_review'
OUT_DIR = 'imgs'
EXTRA_INFO_DB = os.path.join('..', 'data.db')
-#
+
IMG_DISPLAY_SZ = 400
MAX_IMGS_PER_ID = 3
IMG_BG_COLOR = (88, 28, 135)
@@ -28,11 +33,13 @@ class EolImgReviewer:
def __init__(self, root, imgDir, imgList, extraInfoDb, outDir):
self.root = root
root.title('EOL Image Reviewer')
+
# Setup main frame
mainFrame = ttk.Frame(root, padding='5 5 5 5')
mainFrame.grid(column=0, row=0, sticky=(tki.N, tki.W, tki.E, tki.S))
root.columnconfigure(0, weight=1)
root.rowconfigure(0, weight=1)
+
# Set up images-to-be-reviewed frames
self.imgs = [PLACEHOLDER_IMG] * MAX_IMGS_PER_ID # Stored as fields for use in rotation
self.photoImgs = list(map(lambda img: ImageTk.PhotoImage(img), self.imgs)) # Image objects usable by tkinter
@@ -44,9 +51,11 @@ class EolImgReviewer:
label = ttk.Label(frame, image=self.photoImgs[i])
label.grid(column=0, row=0)
self.labels.append(label)
+
# Add padding
for child in mainFrame.winfo_children():
child.grid_configure(padx=5, pady=5)
+
# Add keyboard bindings
root.bind('<q>', self.quit)
root.bind('<Key-j>', lambda evt: self.accept(0))
@@ -59,6 +68,7 @@ class EolImgReviewer:
root.bind('<Key-A>', lambda evt: self.rotate(0, True))
root.bind('<Key-S>', lambda evt: self.rotate(1, True))
root.bind('<Key-D>', lambda evt: self.rotate(2, True))
+
# Initialise fields
self.imgDir = imgDir
self.imgList = imgList
@@ -67,13 +77,15 @@ class EolImgReviewer:
self.nextEolId = 0
self.nextImgNames: list[str] = []
self.rotations: list[int] = []
+
# For displaying extra info
self.extraInfoDbCon = sqlite3.connect(extraInfoDb)
self.extraInfoDbCur = self.extraInfoDbCon.cursor()
self.numReviewed = 0
self.startTime = time.time()
- #
+
self.getNextImgs()
+
def getNextImgs(self):
""" Updates display with new images to review, or ends program """
# Gather names of next images to review
@@ -95,6 +107,7 @@ class EolImgReviewer:
self.nextImgNames.append(imgName)
self.rotations.append(0)
self.imgListIdx += 1
+
# Update displayed images
idx = 0
while idx < MAX_IMGS_PER_ID:
@@ -113,16 +126,19 @@ class EolImgReviewer:
self.photoImgs[idx] = ImageTk.PhotoImage(self.imgs[idx])
self.labels[idx].config(image=self.photoImgs[idx])
idx += 1
+
# Restart if all image files non-recognisable
if not self.nextImgNames:
self.getNextImgs()
return
+
# Update title
firstImgIdx = self.imgListIdx - len(self.nextImgNames) + 1
lastImgIdx = self.imgListIdx
title = self.getExtraInfo(self.nextEolId)
title += f' (imgs {firstImgIdx} to {lastImgIdx} out of {len(self.imgList)})'
self.root.title(title)
+
def accept(self, imgIdx):
""" React to a user selecting an image """
if imgIdx >= len(self.nextImgNames):
@@ -142,12 +158,14 @@ class EolImgReviewer:
os.remove(inFile)
self.numReviewed += 1
self.getNextImgs()
+
def reject(self):
""" React to a user rejecting all images of a set """
for i in range(len(self.nextImgNames)):
os.remove(os.path.join(self.imgDir, self.nextImgNames[i]))
self.numReviewed += 1
self.getNextImgs()
+
def rotate(self, imgIdx, anticlockwise = False):
""" Respond to a user rotating an image """
deg = -90 if not anticlockwise else 90
@@ -155,6 +173,7 @@ class EolImgReviewer:
self.photoImgs[imgIdx] = ImageTk.PhotoImage(self.imgs[imgIdx])
self.labels[imgIdx].config(image=self.photoImgs[imgIdx])
self.rotations[imgIdx] = (self.rotations[imgIdx] + deg) % 360
+
def quit(self, e = None):
print(f'Number reviewed: {self.numReviewed}')
timeElapsed = time.time() - self.startTime
@@ -163,7 +182,7 @@ class EolImgReviewer:
print(f'Avg time per review: {timeElapsed/self.numReviewed:.2f} seconds')
self.extraInfoDbCon.close()
self.root.destroy()
- #
+
def resizeImgForDisplay(self, img):
""" Returns a copy of an image, shrunk to fit in it's frame (keeps aspect ratio), and with a background """
if max(img.width, img.height) > IMG_DISPLAY_SZ:
@@ -178,6 +197,7 @@ class EolImgReviewer:
int((IMG_DISPLAY_SZ - img.width) / 2),
int((IMG_DISPLAY_SZ - img.height) / 2)))
return bgImg
+
def getExtraInfo(self, eolId: int) -> str:
""" Used to display extra EOL ID info """
query = 'SELECT names.alt_name FROM' \
@@ -193,12 +213,14 @@ def reviewImgs(imgDir: str, outDir: str, extraInfoDb: str):
print('Checking output directory')
if not os.path.exists(outDir):
os.mkdir(outDir)
+
print('Getting input image list')
imgList = os.listdir(imgDir)
imgList.sort(key=lambda s: int(s.split(' ')[0]))
if not imgList:
print('No input images found')
sys.exit(0)
+
# Create GUI and defer control
print('Starting GUI')
root = tki.Tk()
@@ -206,8 +228,7 @@ def reviewImgs(imgDir: str, outDir: str, extraInfoDb: str):
root.mainloop()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
- #
+
reviewImgs(IMG_DIR, OUT_DIR, EXTRA_INFO_DB)