From 19c5a2b6c57b8c4245bb07773caa8df598b3feec Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sun, 19 Jun 2022 14:50:38 +1000 Subject: Make manual enwiki-desc-associations more flexible Also enable getEnwikiImgData.py to skip already-processed wiki-ids --- backend/data/enwiki/downloadImgLicenseInfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'backend/data/enwiki/downloadImgLicenseInfo.py') diff --git a/backend/data/enwiki/downloadImgLicenseInfo.py b/backend/data/enwiki/downloadImgLicenseInfo.py index 5d99573..8231fbb 100755 --- a/backend/data/enwiki/downloadImgLicenseInfo.py +++ b/backend/data/enwiki/downloadImgLicenseInfo.py @@ -33,7 +33,7 @@ if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='im # Get image names print("Reading image names") imgNames = set() -for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs"): +for (imgName,) in dbCur.execute("SELECT DISTINCT img_name FROM page_imgs WHERE img_name NOT NULL"): imgNames.add(imgName) print(f"Found {len(imgNames)} images") oldSz = len(imgNames) -- cgit v1.2.3