aboutsummaryrefslogtreecommitdiff
path: root/backend/data/enwiki/downloadImgLicenseInfo.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-19 14:50:38 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-19 14:50:38 +1000
commit19c5a2b6c57b8c4245bb07773caa8df598b3feec (patch)
treee944f66fff51448e75677982c3b3118856aeeb18 /backend/data/enwiki/downloadImgLicenseInfo.py
parent8c8ed28c87e649f163aaa54899f4b00c9fc31224 (diff)
Make manual enwiki-desc-associations more flexible
Also enable getEnwikiImgData.py to skip already-processed wiki-ids
Diffstat (limited to 'backend/data/enwiki/downloadImgLicenseInfo.py')
-rwxr-xr-xbackend/data/enwiki/downloadImgLicenseInfo.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/backend/data/enwiki/downloadImgLicenseInfo.py b/backend/data/enwiki/downloadImgLicenseInfo.py
index 5d99573..8231fbb 100755
--- a/backend/data/enwiki/downloadImgLicenseInfo.py
+++ b/backend/data/enwiki/downloadImgLicenseInfo.py
@@ -33,7 +33,7 @@ if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='im
# Get image names
print("Reading image names")
imgNames = set()
-for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs"):
+for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL"):
imgNames.add(imgName)
print(f"Found {len(imgNames)} images")
oldSz = len(imgNames)