Adjust backend coding style

Increase line spacing, add section comments, etc
author: Terry Truong <terry06890@gmail.com> 2023-01-21 12:21:03 +1100
committer: Terry Truong <terry06890@gmail.com> 2023-01-21 12:32:01 +1100
commit: 0a9b2c2e5eca8a04e37fbdd423379882863237c2 (patch)
tree: 1812bdb6bb13e4f76fdd7ef04075b291f775c213 /backend/hist_data/enwiki/download_img_license_info.py
parent: 8321e2f92dbc073b8f1de87895d6620a2021b22e (diff)
1 files changed, 20 insertions, 9 deletions
diff --git a/backend/hist_data/enwiki/download_img_license_info.py b/backend/hist_data/enwiki/download_img_license_info.py
index 43f2c43..6fd710c 100755
--- a/backend/hist_data/enwiki/download_img_license_info.py
+++ b/backend/hist_data/enwiki/download_img_license_info.py
@@ -10,12 +10,16 @@ at already-processed names to decide what to skip.
 """
 
 import argparse
-import re, time, signal
-import sqlite3, urllib.parse, html
+import re
+import time
+import signal
+import sqlite3
+import urllib.parse
+import html
 import requests
 
 IMG_DB = 'img_data.db'
-#
+
 API_URL = 'https://en.wikipedia.org/w/api.php'
 USER_AGENT = 'terryt.dev (terry06890@gmail.com)'
 BATCH_SZ = 50 # Max 50
@@ -26,17 +30,18 @@ def downloadInfo(imgDb: str) -> None:
 	print('Opening database')
 	dbCon = sqlite3.connect(imgDb)
 	dbCur = dbCon.cursor()
+
 	print('Checking for table')
 	if dbCur.execute('SELECT name FROM sqlite_master WHERE type="table" AND name="imgs"').fetchone() is None:
 		dbCur.execute('CREATE TABLE imgs (id INT PRIMARY KEY, name TEXT UNIQUE, ' \
 			'license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT)')
-	#
+
 	print('Reading image names')
 	imgNames: set[str] = set()
 	for (imgName,) in dbCur.execute('SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL'):
 		imgNames.add(imgName)
 	print(f'Found {len(imgNames)}')
-	#
+
 	print('Checking for already-processed images')
 	nextImgId = 1
 	oldSz = len(imgNames)
@@ -45,7 +50,7 @@ def downloadInfo(imgDb: str) -> None:
 		if imgId >= nextImgId:
 			nextImgId = imgId + 1
 	print(f'Found {oldSz - len(imgNames)}')
-	#
+
 	# Set SIGINT handler
 	interrupted = False
 	oldHandler = None
@@ -54,7 +59,7 @@ def downloadInfo(imgDb: str) -> None:
 		interrupted = True
 		signal.signal(signal.SIGINT, oldHandler)
 	oldHandler = signal.signal(signal.SIGINT, onSigint)
-	#
+
 	print('Iterating through image names')
 	imgNameList = list(imgNames)
 	iterNum = 0
@@ -65,9 +70,11 @@ def downloadInfo(imgDb: str) -> None:
 		if interrupted:
 			print(f'Exiting loop at iteration {iterNum}')
 			break
+
 		# Get batch
 		imgBatch = imgNameList[i:i+BATCH_SZ]
 		imgBatch = ['File:' + x for x in imgBatch]
+
 		# Make request
 		headers = {
 			'user-agent': USER_AGENT,
@@ -90,6 +97,7 @@ def downloadInfo(imgDb: str) -> None:
 			print(f'ERROR: Exception while downloading info: {e}')
 			print('\tImage batch: ' + '|'.join(imgBatch))
 			continue
+
 		# Parse response-object
 		if 'query' not in responseObj or 'pages' not in responseObj['query']:
 			print('WARNING: Response object doesn\'t have page data')
@@ -120,6 +128,7 @@ def downloadInfo(imgDb: str) -> None:
 			if title not in imgNames:
 				print(f'WARNING: Got title "{title}" not in image-name list')
 				continue
+
 			if 'imageinfo' not in page:
 				print(f'WARNING: No imageinfo section for page "{title}"')
 				continue
@@ -129,6 +138,7 @@ def downloadInfo(imgDb: str) -> None:
 			artist: str | None = metadata['Artist']['value'] if 'Artist' in metadata else None
 			credit: str | None = metadata['Credit']['value'] if 'Credit' in metadata else None
 			restrictions: str | None = metadata['Restrictions']['value'] if 'Restrictions' in metadata else None
+
 			# Remove markup
 			if artist is not None:
 				artist = TAG_REGEX.sub(' ', artist).strip()
@@ -140,11 +150,12 @@ def downloadInfo(imgDb: str) -> None:
 				credit = WHITESPACE_REGEX.sub(' ', credit)
 				credit = html.unescape(credit)
 				credit = urllib.parse.unquote(credit)
+
 			# Add to db
 			dbCur.execute('INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?, ?)',
 				(nextImgId, title, license, artist, credit, restrictions, url))
 			nextImgId += 1
-	#
+
 	print('Closing database')
 	dbCon.commit()
 	dbCon.close()
@@ -152,5 +163,5 @@ def downloadInfo(imgDb: str) -> None:
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
 	parser.parse_args()
-	#
+
 	downloadInfo(IMG_DB)
author	Terry Truong <terry06890@gmail.com>	2023-01-21 12:21:03 +1100
committer	Terry Truong <terry06890@gmail.com>	2023-01-21 12:32:01 +1100
commit	0a9b2c2e5eca8a04e37fbdd423379882863237c2 (patch)
tree	1812bdb6bb13e4f76fdd7ef04075b291f775c213 /backend/hist_data/enwiki/download_img_license_info.py
parent	8321e2f92dbc073b8f1de87895d6620a2021b22e (diff)