diff options
| author | Terry Truong <terry06890@gmail.com> | 2023-01-21 12:21:03 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2023-01-21 12:32:01 +1100 |
| commit | 0a9b2c2e5eca8a04e37fbdd423379882863237c2 (patch) | |
| tree | 1812bdb6bb13e4f76fdd7ef04075b291f775c213 /backend/hist_data/gen_desc_data.py | |
| parent | 8321e2f92dbc073b8f1de87895d6620a2021b22e (diff) | |
Adjust backend coding style
Increase line spacing, add section comments, etc
Diffstat (limited to 'backend/hist_data/gen_desc_data.py')
| -rwxr-xr-x | backend/hist_data/gen_desc_data.py | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py index 6c9fee2..bcd8870 100755 --- a/backend/hist_data/gen_desc_data.py +++ b/backend/hist_data/gen_desc_data.py @@ -5,7 +5,8 @@ Maps events to short descriptions from Wikipedia, and stores them in the databas """ import argparse -import os, sqlite3 +import os +import sqlite3 ENWIKI_DB = os.path.join('enwiki', 'desc_data.db') DB_FILE = 'data.db' @@ -15,12 +16,12 @@ def genData(enwikiDb: str, dbFile: str) -> None: dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() dbCur.execute('CREATE TABLE descs (id INT PRIMARY KEY, wiki_id INT, desc TEXT)') - # + print('Getting events') titleToId: dict[str, int] = {} for eventId, title in dbCur.execute('SELECT id, title FROM events'): titleToId[title] = eventId - # + print('Getting Wikipedia descriptions') enwikiCon = sqlite3.connect(enwikiDb) enwikiCur = enwikiCon.cursor() @@ -29,11 +30,13 @@ def genData(enwikiDb: str, dbFile: str) -> None: iterNum += 1 if iterNum % 1e4 == 0: print(f'At iteration {iterNum}') + # Get wiki ID row = enwikiCur.execute('SELECT id FROM pages WHERE title = ?', (title,)).fetchone() if row is None: continue wikiId = row[0] + # Check for redirect wikiIdToGet = wikiId query = \ @@ -41,12 +44,13 @@ def genData(enwikiDb: str, dbFile: str) -> None: row = enwikiCur.execute(query, (wikiId,)).fetchone() if row is not None: wikiIdToGet = row[0] + # Get desc row = enwikiCur.execute('SELECT desc FROM descs where id = ?', (wikiIdToGet,)).fetchone() if row is None: continue dbCur.execute('INSERT INTO descs VALUES (?, ?, ?)', (eventId, wikiId, row[0])) - # + print('Closing databases') dbCon.commit() dbCon.close() @@ -54,5 +58,5 @@ def genData(enwikiDb: str, dbFile: str) -> None: if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) args = parser.parse_args() - # + genData(ENWIKI_DB, DB_FILE) |
