diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-12-30 23:28:09 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-12-30 23:33:37 +1100 |
| commit | 0e5e46cedaaeacf59cfd0f2e30c1ae6923466870 (patch) | |
| tree | 016b712ce1d4255895bbba11714e624df09cfc4a /backend/hist_data/gen_desc_data.py | |
| parent | 086b0c30afdf2c0fbff48e1005b2f9220b028094 (diff) | |
Generate event_disp data before image-generation
Make gen_disp_data.py delete non-displayable events
Make reduce_event_data.py also delete from 'dist' and 'event_disp'
Remove MAX_IMGS_PER_CTG from enwiki/gen_img_data.py
Make gen_desc_data.py include events without images
Diffstat (limited to 'backend/hist_data/gen_desc_data.py')
| -rwxr-xr-x | backend/hist_data/gen_desc_data.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py index 68f9e56..0d7ee88 100755 --- a/backend/hist_data/gen_desc_data.py +++ b/backend/hist_data/gen_desc_data.py @@ -16,10 +16,9 @@ def genData(enwikiDb: str, dbFile: str) -> None: dbCur = dbCon.cursor() dbCur.execute('CREATE TABLE descs (id INT PRIMARY KEY, wiki_id INT, desc TEXT)') # - print('Getting events with images') + print('Getting events') titleToId: dict[str, int] = {} - query = 'SELECT events.id, events.title FROM events INNER JOIN event_imgs ON events.id = event_imgs.id' - for eventId, title in dbCur.execute(query): + for eventId, title in dbCur.execute('SELECT id, title FROM events'): titleToId[title] = eventId # print('Getting Wikipedia descriptions') |
