From 56369bccd977ac726bef70895883e79da4e1edd8 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Mon, 2 Jan 2023 14:51:53 +1100 Subject: Adjust wikidata event specifiers Do minor refactors: - Swap fmt=1 and fmt=2 in 'events' table - Make documentation consistently use BC and AD - import argparse at start of scripts --- backend/hist_data/enwiki/download_imgs.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'backend/hist_data/enwiki/download_imgs.py') diff --git a/backend/hist_data/enwiki/download_imgs.py b/backend/hist_data/enwiki/download_imgs.py index bbd2cda..7dd0771 100755 --- a/backend/hist_data/enwiki/download_imgs.py +++ b/backend/hist_data/enwiki/download_imgs.py @@ -9,10 +9,10 @@ The program can be re-run to continue downloading, and looks in the output directory do decide what to skip. """ -import re, os +import argparse +import re, os, time, signal import sqlite3 import urllib.parse, requests -import time, signal IMG_DB = 'img_data.db' # About 130k image names OUT_DIR = 'imgs' @@ -22,7 +22,7 @@ USER_AGENT = 'terryt.dev (terry06890@gmail.com)' TIMEOUT = 1 # https://en.wikipedia.org/wiki/Wikipedia:Database_download says to 'throttle to 1 cache miss per sec' # It's unclear how to properly check for cache misses, so we just aim for 1 per sec -BACKOFF = False # If True, double the timeout each time a download error occurs (otherwise just exit) +EXP_BACKOFF = False # If True, double the timeout each time a download error occurs (otherwise just exit) def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None: if not os.path.exists(outDir): @@ -84,7 +84,7 @@ def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None: time.sleep(timeout) except Exception as e: print(f'Error while downloading to {outFile}: {e}') - if not BACKOFF: + if not EXP_BACKOFF: return else: timeout *= 2 @@ -94,7 +94,6 @@ def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None: dbCon.close() if __name__ == '__main__': - import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() # -- cgit v1.2.3