diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-10-03 19:54:06 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-10-03 19:54:06 +1100 |
| commit | 0c6c79084bd0ba331c469b4203627f18eb3b8275 (patch) | |
| tree | c15b2d8b2bdd12a7dd212fd26cda3d0b79f24df1 /backend/hist_data/enwiki/download_imgs.py | |
| parent | 30851ce8a6bf60cba48de372e7c923167cc17d8a (diff) | |
Add exponential-backoff download option
Diffstat (limited to 'backend/hist_data/enwiki/download_imgs.py')
| -rwxr-xr-x | backend/hist_data/enwiki/download_imgs.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/backend/hist_data/enwiki/download_imgs.py b/backend/hist_data/enwiki/download_imgs.py index 664dd28..bbd2cda 100755 --- a/backend/hist_data/enwiki/download_imgs.py +++ b/backend/hist_data/enwiki/download_imgs.py @@ -22,6 +22,7 @@ USER_AGENT = 'terryt.dev (terry06890@gmail.com)' TIMEOUT = 1 # https://en.wikipedia.org/wiki/Wikipedia:Database_download says to 'throttle to 1 cache miss per sec' # It's unclear how to properly check for cache misses, so we just aim for 1 per sec +BACKOFF = False # If True, double the timeout each time a download error occurs (otherwise just exit) def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None: if not os.path.exists(outDir): @@ -83,7 +84,12 @@ def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None: time.sleep(timeout) except Exception as e: print(f'Error while downloading to {outFile}: {e}') - return + if not BACKOFF: + return + else: + timeout *= 2 + print(f'New timeout: {timeout}') + continue print('Closing database') dbCon.close() |
