aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/enwiki/genImgData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-08-30 12:27:42 +1000
committerTerry Truong <terry06890@gmail.com>2022-08-30 12:27:42 +1000
commite8e58a3bb9dc233dacf573973457c5b48d369503 (patch)
tree242500ca304c5afbb7e6506e61da4c4dfff0b175 /backend/tolData/enwiki/genImgData.py
parent930c12d33e1093f874a4beb4d6376621e464e8c0 (diff)
Add scripts for generating eol/enwiki mappings
- New data sources: OTOL taxonomy, EOL provider-ids, Wikidata dump - Add 'node_iucn' table - Remove 'redirected' field from 'wiki_ids' table - Make 'eol_ids' table have 'name' as the primary key - Combine name-generation scripts into genNameData.py - Combine description-generation scripts into genDescData.py
Diffstat (limited to 'backend/tolData/enwiki/genImgData.py')
-rwxr-xr-xbackend/tolData/enwiki/genImgData.py1
1 files changed, 0 insertions, 1 deletions
diff --git a/backend/tolData/enwiki/genImgData.py b/backend/tolData/enwiki/genImgData.py
index 97e696f..b5d546d 100755
--- a/backend/tolData/enwiki/genImgData.py
+++ b/backend/tolData/enwiki/genImgData.py
@@ -30,7 +30,6 @@ imageLineRegex = re.compile(r".*\| *image *= *([^|]*)")
bracketImageRegex = re.compile(r"\[\[(File:[^|]*).*]]")
imageNameRegex = re.compile(r".*\.(jpg|jpeg|png|gif|tiff|tif)", flags=re.IGNORECASE)
cssImgCropRegex = re.compile(r"{{css image crop\|image *= *(.*)", flags=re.IGNORECASE)
-# In testing, got about 360k image names
print("Getting input page-ids")
pageIds = getInputPageIds()