From 0e5e46cedaaeacf59cfd0f2e30c1ae6923466870 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Fri, 30 Dec 2022 23:28:09 +1100 Subject: Generate event_disp data before image-generation Make gen_disp_data.py delete non-displayable events Make reduce_event_data.py also delete from 'dist' and 'event_disp' Remove MAX_IMGS_PER_CTG from enwiki/gen_img_data.py Make gen_desc_data.py include events without images --- backend/hist_data/gen_desc_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'backend/hist_data/gen_desc_data.py') diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py index 68f9e56..0d7ee88 100755 --- a/backend/hist_data/gen_desc_data.py +++ b/backend/hist_data/gen_desc_data.py @@ -16,10 +16,9 @@ def genData(enwikiDb: str, dbFile: str) -> None: dbCur = dbCon.cursor() dbCur.execute('CREATE TABLE descs (id INT PRIMARY KEY, wiki_id INT, desc TEXT)') # - print('Getting events with images') + print('Getting events') titleToId: dict[str, int] = {} - query = 'SELECT events.id, events.title FROM events INNER JOIN event_imgs ON events.id = event_imgs.id' - for eventId, title in dbCur.execute(query): + for eventId, title in dbCur.execute('SELECT id, title FROM events'): titleToId[title] = eventId # print('Getting Wikipedia descriptions') -- cgit v1.2.3