aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/gen_desc_data.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-12-30 23:28:09 +1100
committerTerry Truong <terry06890@gmail.com>2022-12-30 23:33:37 +1100
commit0e5e46cedaaeacf59cfd0f2e30c1ae6923466870 (patch)
tree016b712ce1d4255895bbba11714e624df09cfc4a /backend/hist_data/gen_desc_data.py
parent086b0c30afdf2c0fbff48e1005b2f9220b028094 (diff)
Generate event_disp data before image-generation
Make gen_disp_data.py delete non-displayable events Make reduce_event_data.py also delete from 'dist' and 'event_disp' Remove MAX_IMGS_PER_CTG from enwiki/gen_img_data.py Make gen_desc_data.py include events without images
Diffstat (limited to 'backend/hist_data/gen_desc_data.py')
-rwxr-xr-xbackend/hist_data/gen_desc_data.py5
1 files changed, 2 insertions, 3 deletions
diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py
index 68f9e56..0d7ee88 100755
--- a/backend/hist_data/gen_desc_data.py
+++ b/backend/hist_data/gen_desc_data.py
@@ -16,10 +16,9 @@ def genData(enwikiDb: str, dbFile: str) -> None:
dbCur = dbCon.cursor()
dbCur.execute('CREATE TABLE descs (id INT PRIMARY KEY, wiki_id INT, desc TEXT)')
#
- print('Getting events with images')
+ print('Getting events')
titleToId: dict[str, int] = {}
- query = 'SELECT events.id, events.title FROM events INNER JOIN event_imgs ON events.id = event_imgs.id'
- for eventId, title in dbCur.execute(query):
+ for eventId, title in dbCur.execute('SELECT id, title FROM events'):
titleToId[title] = eventId
#
print('Getting Wikipedia descriptions')