From e8e58a3bb9dc233dacf573973457c5b48d369503 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Tue, 30 Aug 2022 12:27:42 +1000 Subject: Add scripts for generating eol/enwiki mappings - New data sources: OTOL taxonomy, EOL provider-ids, Wikidata dump - Add 'node_iucn' table - Remove 'redirected' field from 'wiki_ids' table - Make 'eol_ids' table have 'name' as the primary key - Combine name-generation scripts into genNameData.py - Combine description-generation scripts into genDescData.py --- backend/tolData/eol/downloadImgs.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'backend/tolData/eol/downloadImgs.py') diff --git a/backend/tolData/eol/downloadImgs.py b/backend/tolData/eol/downloadImgs.py index 7ca4e79..4d658e7 100755 --- a/backend/tolData/eol/downloadImgs.py +++ b/backend/tolData/eol/downloadImgs.py @@ -22,8 +22,6 @@ highest EOL ID. """, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() -# In testing, this script downloaded about 70k images, over a few days - imagesListDb = "imagesList.db" def getInputEolIds(): eolIds = set() @@ -95,7 +93,6 @@ def downloadImg(url, outFile): for idx in range(nextIdx, len(eolIds)): eolId = eolIds[idx] # Get image urls - imgDataList = [] ownerSet = set() # Used to get images from different owners, for variety exitLoop = False query = "SELECT content_id, copy_url, license, copyright_owner FROM images WHERE page_id = ?" -- cgit v1.2.3